Merge pull request #9705 from AlexeyAB:dnn_darknet_yolo_v2
authorVadim Pisarevsky <vadim.pisarevsky@gmail.com>
Tue, 10 Oct 2017 12:02:03 +0000 (12:02 +0000)
committerVadim Pisarevsky <vadim.pisarevsky@gmail.com>
Tue, 10 Oct 2017 12:02:03 +0000 (12:02 +0000)
377 files changed:
3rdparty/ffmpeg/ffmpeg.cmake
3rdparty/libjpeg/CMakeLists.txt
CMakeLists.txt
cmake/OpenCVCompilerOptimizations.cmake
cmake/OpenCVDetectCUDA.cmake
cmake/OpenCVDetectCXXCompiler.cmake
cmake/OpenCVDetectTBB.cmake
cmake/OpenCVFindIPPIW.cmake
cmake/OpenCVFindLibsGrfmt.cmake
cmake/OpenCVPackaging.cmake
cmake/checks/cpu_vsx.cpp [new file with mode: 0644]
doc/CMakeLists.txt
doc/Doxyfile.in
doc/js_tutorials/js_assets/.eslintrc.json [new file with mode: 0644]
doc/js_tutorials/js_assets/apple.jpg [new file with mode: 0644]
doc/js_tutorials/js_assets/coins.jpg [new file with mode: 0644]
doc/js_tutorials/js_assets/cup.mp4 [new file with mode: 0644]
doc/js_tutorials/js_assets/handDst.jpg [new file with mode: 0644]
doc/js_tutorials/js_assets/handSrc.jpg [new file with mode: 0644]
doc/js_tutorials/js_assets/js_basic_ops_copymakeborder.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_basic_ops_roi.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_bg_subtraction.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_camshift.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_canny.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_colorspaces_cvtColor.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_colorspaces_inRange.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_contour_features_approxPolyDP.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_contour_features_area.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_contour_features_boundingRect.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_contour_features_convexHull.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_contour_features_fitEllipse.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_contour_features_fitLine.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_contour_features_minAreaRect.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_contour_features_minEnclosingCircle.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_contour_features_moments.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_contour_features_perimeter.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_contour_properties_transpose.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_contours_begin_contours.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_contours_more_functions_convexityDefects.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_contours_more_functions_shape.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_example_style.css [new file with mode: 0644]
doc/js_tutorials/js_assets/js_face_detection.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_face_detection_camera.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_filtering_GaussianBlur.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_filtering_bilateralFilter.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_filtering_blur.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_filtering_filter.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_filtering_medianBlur.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_fourier_transform_dft.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_geometric_transformations_getAffineTransform.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_geometric_transformations_resize.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_geometric_transformations_rotateWarpAffine.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_geometric_transformations_warpAffine.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_geometric_transformations_warpPerspective.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_grabcut_grabCut.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_gradients_Laplacian.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_gradients_Sobel.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_gradients_absSobel.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_histogram_backprojection_calcBackProject.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_histogram_begins_calcHist.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_histogram_equalization_createCLAHE.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_histogram_equalization_equalizeHist.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_houghcircles_HoughCirclesP.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_houghlines_HoughLines.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_houghlines_HoughLinesP.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_image_arithmetics_bitwise.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_image_display.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_imgproc_camera.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_meanshift.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_morphological_ops_blackHat.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_morphological_ops_closing.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_morphological_ops_dilate.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_morphological_ops_erode.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_morphological_ops_getStructuringElement.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_morphological_ops_gradient.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_morphological_ops_opening.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_morphological_ops_topHat.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_optical_flow_dense.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_optical_flow_lucas_kanade.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_pyramids_pyrDown.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_pyramids_pyrUp.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_setup_usage.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_template_matching_matchTemplate.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_thresholding_adaptiveThreshold.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_thresholding_threshold.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_trackbar.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_video_display.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_watershed_background.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_watershed_distanceTransform.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_watershed_foreground.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_watershed_threshold.html [new file with mode: 0644]
doc/js_tutorials/js_assets/js_watershed_watershed.html [new file with mode: 0644]
doc/js_tutorials/js_assets/lena.jpg [new file with mode: 0644]
doc/js_tutorials/js_assets/lenaFace.png [new file with mode: 0644]
doc/js_tutorials/js_assets/opencv_logo.jpg [new file with mode: 0644]
doc/js_tutorials/js_assets/orange.jpg [new file with mode: 0644]
doc/js_tutorials/js_assets/shape.jpg [new file with mode: 0644]
doc/js_tutorials/js_assets/utils.js [new file with mode: 0644]
doc/js_tutorials/js_core/js_basic_ops/js_basic_ops.markdown [new file with mode: 0644]
doc/js_tutorials/js_core/js_image_arithmetics/js_image_arithmetics.markdown [new file with mode: 0644]
doc/js_tutorials/js_core/js_some_data_structures/js_image_arithmetics.markdown [new file with mode: 0644]
doc/js_tutorials/js_core/js_table_of_contents_core.markdown [new file with mode: 0644]
doc/js_tutorials/js_gui/js_image_display/images/Imread_Imshow_Tutorial_Result.png [new file with mode: 0644]
doc/js_tutorials/js_gui/js_image_display/js_image_display.markdown [new file with mode: 0644]
doc/js_tutorials/js_gui/js_table_of_contents_gui.markdown [new file with mode: 0644]
doc/js_tutorials/js_gui/js_trackbar/images/Trackbar_Tutorial_Range.png [new file with mode: 0644]
doc/js_tutorials/js_gui/js_trackbar/images/Trackbar_Tutorial_Result.png [new file with mode: 0644]
doc/js_tutorials/js_gui/js_trackbar/js_trackbar.markdown [new file with mode: 0644]
doc/js_tutorials/js_gui/js_video_display/js_video_display.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_canny/js_canny.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_colorspaces/js_colorspaces.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_contours/js_contour_features/js_contour_features.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_contours/js_contour_properties/js_contour_properties.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_contours/js_contours_begin/js_contours_begin.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_contours/js_contours_hierarchy/js_contours_hierarchy.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_contours/js_contours_more_functions/js_contours_more_functions.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_contours/js_table_of_contents_contours.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_filtering/js_filtering.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_geometric_transformations/js_geometric_transformations.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_grabcut/js_grabcut.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_gradients/js_gradients.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_histograms/js_histogram_backprojection/js_histogram_backprojection.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_histograms/js_histogram_begins/js_histogram_begins.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_histograms/js_histogram_equalization/js_histogram_equalization.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_histograms/js_table_of_contents_histograms.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_houghcircles/js_houghcircles.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_houghlines/js_houghlines.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_imgproc_camera/js_imgproc_camera.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_morphological_ops/js_morphological_ops.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_pyramids/js_pyramids.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_table_of_contents_imgproc.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_template_matching/js_template_matching.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_thresholding/js_thresholding.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_transforms/js_fourier_transform/js_fourier_transform.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_transforms/js_table_of_contents_transforms.markdown [new file with mode: 0644]
doc/js_tutorials/js_imgproc/js_watershed/js_watershed.markdown [new file with mode: 0644]
doc/js_tutorials/js_objdetect/js_face_detection/js_face_detection.markdown [new file with mode: 0644]
doc/js_tutorials/js_objdetect/js_face_detection/js_face_detection_camera.markdown [new file with mode: 0644]
doc/js_tutorials/js_objdetect/js_table_of_contents_objdetect.markdown [new file with mode: 0644]
doc/js_tutorials/js_setup/js_intro/js_intro.markdown [new file with mode: 0644]
doc/js_tutorials/js_setup/js_setup/js_setup.markdown [new file with mode: 0644]
doc/js_tutorials/js_setup/js_table_of_contents_setup.markdown [new file with mode: 0644]
doc/js_tutorials/js_setup/js_usage/js_usage.markdown [new file with mode: 0644]
doc/js_tutorials/js_tutorials.markdown [new file with mode: 0644]
doc/js_tutorials/js_video/js_bg_subtraction/js_bg_subtraction.markdown [new file with mode: 0644]
doc/js_tutorials/js_video/js_lucas_kanade/images/optical_flow_basic1.jpg [new file with mode: 0644]
doc/js_tutorials/js_video/js_lucas_kanade/js_lucas_kanade.markdown [new file with mode: 0644]
doc/js_tutorials/js_video/js_meanshift/images/camshift_face.gif [new file with mode: 0644]
doc/js_tutorials/js_video/js_meanshift/images/meanshift_basics.jpg [new file with mode: 0644]
doc/js_tutorials/js_video/js_meanshift/images/meanshift_face.gif [new file with mode: 0644]
doc/js_tutorials/js_video/js_meanshift/js_meanshift.markdown [new file with mode: 0644]
doc/js_tutorials/js_video/js_table_of_contents_video.markdown [new file with mode: 0644]
doc/opencv.bib
doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown
doc/py_tutorials/py_feature2d/py_matcher/py_matcher.markdown
doc/py_tutorials/py_feature2d/py_surf_intro/py_surf_intro.markdown
doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown
doc/py_tutorials/py_imgproc/py_histograms/py_histogram_begins/py_histogram_begins.markdown
doc/root.markdown.in
doc/tutorials/imgproc/gausian_median_blur_bilateral_filter/gausian_median_blur_bilateral_filter.markdown
doc/tutorials/imgproc/hitOrMiss/hitOrMiss.markdown
doc/tutorials/imgproc/imgtrans/copyMakeBorder/copyMakeBorder.markdown
doc/tutorials/imgproc/imgtrans/filter_2d/filter_2d.markdown
doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.markdown
doc/tutorials/imgproc/imgtrans/hough_circle/images/Hough_Circle_Tutorial_Result.png [new file with mode: 0644]
doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.markdown
doc/tutorials/imgproc/imgtrans/hough_lines/images/hough_lines_result1.png [new file with mode: 0644]
doc/tutorials/imgproc/imgtrans/hough_lines/images/hough_lines_result2.png [new file with mode: 0644]
doc/tutorials/imgproc/imgtrans/laplace_operator/laplace_operator.markdown
doc/tutorials/imgproc/imgtrans/sobel_derivatives/sobel_derivatives.markdown
doc/tutorials/imgproc/morph_lines_detection/moprh_lines_detection.md [deleted file]
doc/tutorials/imgproc/morph_lines_detection/morph_lines_detection.md [new file with mode: 0644]
doc/tutorials/imgproc/pyramids/pyramids.markdown
doc/tutorials/imgproc/table_of_content_imgproc.markdown
doc/tutorials/introduction/cross_referencing/tutorial_cross_referencing.markdown [new file with mode: 0644]
doc/tutorials/introduction/table_of_content_introduction.markdown
doc/tutorials/viz/histo3D/histo3D.markdown [new file with mode: 0644]
doc/tutorials/viz/histo3D/images/histo50.png [new file with mode: 0644]
doc/tutorials/viz/table_of_content_viz.markdown
modules/calib3d/include/opencv2/calib3d.hpp
modules/calib3d/misc/java/gen_dict.json
modules/calib3d/src/calibinit.cpp
modules/calib3d/src/circlesgrid.cpp
modules/calib3d/src/circlesgrid.hpp
modules/calib3d/src/fisheye.cpp
modules/core/CMakeLists.txt
modules/core/include/opencv2/core.hpp
modules/core/include/opencv2/core/base.hpp
modules/core/include/opencv2/core/cv_cpu_dispatch.h
modules/core/include/opencv2/core/cv_cpu_helper.h
modules/core/include/opencv2/core/cvdef.h
modules/core/include/opencv2/core/hal/intrin.hpp
modules/core/include/opencv2/core/hal/intrin_vsx.hpp [new file with mode: 0644]
modules/core/include/opencv2/core/mat.hpp
modules/core/include/opencv2/core/ocl.hpp
modules/core/include/opencv2/core/utils/configuration.private.hpp [new file with mode: 0644]
modules/core/include/opencv2/core/utils/logger.hpp
modules/core/include/opencv2/core/vsx_utils.hpp [new file with mode: 0644]
modules/core/src/arithm.cpp
modules/core/src/command_line_parser.cpp
modules/core/src/lda.cpp
modules/core/src/matrix.cpp
modules/core/src/ocl.cpp
modules/core/src/ocl_deprecated.hpp
modules/core/src/precomp.hpp
modules/core/src/softfloat.cpp
modules/core/src/stat.cpp
modules/core/src/system.cpp
modules/core/src/trace.cpp
modules/core/src/umatrix.cpp
modules/core/test/test_eigen.cpp
modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp
modules/dnn/CMakeLists.txt
modules/dnn/include/opencv2/dnn/all_layers.hpp
modules/dnn/include/opencv2/dnn/dnn.hpp
modules/dnn/perf/opencl/perf_convolution.cpp [new file with mode: 0644]
modules/dnn/perf/perf_convolution.cpp
modules/dnn/perf/perf_halide_net.cpp [deleted file]
modules/dnn/perf/perf_net.cpp [new file with mode: 0644]
modules/dnn/perf/perf_precomp.hpp
modules/dnn/src/caffe/caffe_importer.cpp
modules/dnn/src/dnn.cpp
modules/dnn/src/init.cpp
modules/dnn/src/layers/concat_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
modules/dnn/src/layers/detection_output_layer.cpp
modules/dnn/src/layers/elementwise_layers.cpp
modules/dnn/src/layers/eltwise_layer.cpp
modules/dnn/src/layers/fully_connected_layer.cpp
modules/dnn/src/layers/layers_common.hpp
modules/dnn/src/layers/layers_common.simd.hpp
modules/dnn/src/layers/lp_normalize_layer.cpp
modules/dnn/src/layers/lrn_layer.cpp
modules/dnn/src/layers/padding_layer.cpp
modules/dnn/src/layers/pooling_layer.cpp
modules/dnn/src/layers/prior_box_layer.cpp
modules/dnn/src/layers/recurrent_layers.cpp
modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp [new file with mode: 0644]
modules/dnn/src/layers/scale_layer.cpp
modules/dnn/src/layers/slice_layer.cpp
modules/dnn/src/layers/softmax_layer.cpp
modules/dnn/src/ocl4dnn/include/common.hpp [new file with mode: 0644]
modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp [new file with mode: 0644]
modules/dnn/src/ocl4dnn/include/math_functions.hpp [new file with mode: 0644]
modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp [new file with mode: 0644]
modules/dnn/src/ocl4dnn/src/common.cpp [new file with mode: 0644]
modules/dnn/src/ocl4dnn/src/math_functions.cpp [new file with mode: 0644]
modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp [new file with mode: 0644]
modules/dnn/src/ocl4dnn/src/ocl4dnn_inner_product.cpp [new file with mode: 0644]
modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp [new file with mode: 0644]
modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp [new file with mode: 0644]
modules/dnn/src/ocl4dnn/src/ocl4dnn_softmax.cpp [new file with mode: 0644]
modules/dnn/src/opencl/activations.cl
modules/dnn/src/opencl/batchnorm.cl [new file with mode: 0644]
modules/dnn/src/opencl/concat.cl [new file with mode: 0644]
modules/dnn/src/opencl/conv_layer_spatial.cl [new file with mode: 0644]
modules/dnn/src/opencl/conv_spatial_helper.cl [new file with mode: 0644]
modules/dnn/src/opencl/dummy.cl [new file with mode: 0644]
modules/dnn/src/opencl/gemm_image.cl [new file with mode: 0644]
modules/dnn/src/opencl/math.cl [new file with mode: 0644]
modules/dnn/src/opencl/matvec_mul.cl [new file with mode: 0644]
modules/dnn/src/opencl/ocl4dnn_lrn.cl [new file with mode: 0644]
modules/dnn/src/opencl/ocl4dnn_pooling.cl [new file with mode: 0644]
modules/dnn/src/opencl/softmax.cl
modules/dnn/src/opencl/softmax_loss.cl [new file with mode: 0644]
modules/dnn/src/precomp.hpp
modules/dnn/src/tensorflow/tf_importer.cpp
modules/dnn/src/tensorflow/tf_io.cpp
modules/dnn/src/tensorflow/tf_io.hpp
modules/dnn/src/torch/torch_importer.cpp
modules/dnn/test/test_caffe_importer.cpp
modules/dnn/test/test_googlenet.cpp
modules/dnn/test/test_halide_layers.cpp
modules/dnn/test/test_layers.cpp
modules/dnn/test/test_tf_importer.cpp
modules/dnn/test/test_torch_importer.cpp
modules/highgui/CMakeLists.txt
modules/imgcodecs/src/grfmt_bmp.cpp
modules/imgcodecs/src/grfmt_tiff.cpp
modules/imgcodecs/src/grfmt_tiff.hpp
modules/imgcodecs/src/loadsave.cpp
modules/imgproc/CMakeLists.txt
modules/imgproc/include/opencv2/imgproc.hpp
modules/imgproc/perf/perf_resize.cpp
modules/imgproc/src/featureselect.cpp
modules/imgproc/src/hough.cpp
modules/imgproc/src/shapedescr.cpp
modules/imgproc/src/smooth.cpp
modules/imgproc/src/undistort.avx2.cpp
modules/imgproc/test/test_fitellipseAMS.cpp [new file with mode: 0644]
modules/imgproc/test/test_fitellipseDirect.cpp [new file with mode: 0644]
modules/js/CMakeLists.txt [new file with mode: 0644]
modules/js/src/.eslintrc.json [new file with mode: 0644]
modules/js/src/core_bindings.cpp [new file with mode: 0644]
modules/js/src/embindgen.py [new file with mode: 0644]
modules/js/src/helpers.js [new file with mode: 0644]
modules/js/src/make_umd.py [new file with mode: 0644]
modules/js/src/templates.py [new file with mode: 0644]
modules/js/test/.eslintrc.json [new file with mode: 0644]
modules/js/test/package.json [new file with mode: 0644]
modules/js/test/test_imgproc.js [new file with mode: 0644]
modules/js/test/test_mat.js [new file with mode: 0644]
modules/js/test/test_objdetect.js [new file with mode: 0644]
modules/js/test/test_utils.js [new file with mode: 0644]
modules/js/test/test_video.js [new file with mode: 0644]
modules/js/test/tests.html [new file with mode: 0644]
modules/js/test/tests.js [new file with mode: 0644]
modules/objdetect/CMakeLists.txt
modules/objdetect/include/opencv2/objdetect.hpp
modules/objdetect/src/cascadedetect.cpp
modules/python/src2/hdr_parser.py
modules/python/test/test_feature_homography.py
modules/stitching/perf/perf_matchers.cpp
modules/stitching/src/exposure_compensate.cpp
modules/ts/include/opencv2/ts.hpp
modules/ts/include/opencv2/ts/cuda_perf.hpp
modules/ts/include/opencv2/ts/ocl_perf.hpp
modules/ts/include/opencv2/ts/ocl_test.hpp
modules/ts/include/opencv2/ts/ts_ext.hpp
modules/ts/include/opencv2/ts/ts_perf.hpp
modules/ts/src/ts_func.cpp
modules/video/CMakeLists.txt
modules/videoio/CMakeLists.txt
modules/videoio/src/cap_dc1394_v2.cpp
modules/videoio/src/cap_ios_video_camera.mm
platforms/js/README.md [new file with mode: 0644]
platforms/js/build_js.py [new file with mode: 0644]
samples/android/mobilenet-objdetect/src/org/opencv/samples/opencv_mobilenet/MainActivity.java
samples/cpp/fitellipse.cpp
samples/cpp/houghcircles.cpp [deleted file]
samples/cpp/houghlines.cpp [deleted file]
samples/cpp/tutorial_code/ImgProc/HitMiss/HitMiss.cpp [moved from samples/cpp/tutorial_code/ImgProc/HitMiss.cpp with 88% similarity]
samples/cpp/tutorial_code/ImgProc/Pyramids.cpp [deleted file]
samples/cpp/tutorial_code/ImgProc/Pyramids/Pyramids.cpp [new file with mode: 0644]
samples/cpp/tutorial_code/ImgProc/Smoothing.cpp [deleted file]
samples/cpp/tutorial_code/ImgProc/Smoothing/Smoothing.cpp [new file with mode: 0644]
samples/cpp/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.cpp [moved from samples/cpp/tutorial_code/ImgProc/Morphology_3.cpp with 68% similarity]
samples/cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp
samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp
samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp
samples/cpp/tutorial_code/ImgTrans/filter2D_demo.cpp
samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp [new file with mode: 0644]
samples/cpp/tutorial_code/ImgTrans/houghlines.cpp [new file with mode: 0644]
samples/cpp/tutorial_code/viz/histo3D.cpp [new file with mode: 0644]
samples/data/ellipses.jpg [new file with mode: 0644]
samples/data/smarties.png [new file with mode: 0644]
samples/dnn/caffe_googlenet.cpp
samples/dnn/colorization.py [new file with mode: 0644]
samples/dnn/fcn_semsegm.cpp
samples/dnn/googlenet_python.py
samples/dnn/mobilenet_ssd_accuracy.py [new file with mode: 0644]
samples/dnn/mobilenet_ssd_python.py
samples/dnn/resnet_ssd_face_python.py
samples/dnn/shrink_tf_graph_weights.py [new file with mode: 0644]
samples/dnn/ssd_mobilenet_object_detection.cpp
samples/dnn/ssd_object_detection.cpp
samples/java/tutorial_code/ImgProc/HitMiss/HitMiss.java [new file with mode: 0644]
samples/java/tutorial_code/ImgProc/Pyramids/Pyramids.java [new file with mode: 0644]
samples/java/tutorial_code/ImgProc/Smoothing/Smoothing.java [new file with mode: 0644]
samples/java/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.java [new file with mode: 0644]
samples/java/tutorial_code/ImgTrans/Filter2D/Filter2D_Demo.java [new file with mode: 0644]
samples/java/tutorial_code/ImgTrans/HoughCircle/HoughCircles.java [new file with mode: 0644]
samples/java/tutorial_code/ImgTrans/HoughLine/HoughLines.java [new file with mode: 0644]
samples/java/tutorial_code/ImgTrans/LaPlace/LaplaceDemo.java [new file with mode: 0644]
samples/java/tutorial_code/ImgTrans/MakeBorder/CopyMakeBorder.java [new file with mode: 0644]
samples/java/tutorial_code/ImgTrans/SobelDemo/SobelDemo.java [new file with mode: 0644]
samples/python/plane_tracker.py
samples/python/tutorial_code/ImgTrans/Filter2D/filter2D.py [new file with mode: 0644]
samples/python/tutorial_code/ImgTrans/HoughCircle/hough_circle.py [new file with mode: 0644]
samples/python/tutorial_code/ImgTrans/HoughLine/hough_lines.py [new file with mode: 0644]
samples/python/tutorial_code/ImgTrans/LaPlace/laplace_demo.py [new file with mode: 0644]
samples/python/tutorial_code/ImgTrans/MakeBorder/copy_make_border.py [new file with mode: 0644]
samples/python/tutorial_code/ImgTrans/SobelDemo/sobel_demo.py [new file with mode: 0644]
samples/python/tutorial_code/imgProc/HitMiss/hit_miss.py [new file with mode: 0644]
samples/python/tutorial_code/imgProc/Pyramids/pyramids.py [new file with mode: 0644]
samples/python/tutorial_code/imgProc/Smoothing/smoothing.py [new file with mode: 0644]
samples/python/tutorial_code/imgProc/morph_lines_detection/morph_lines_detection.py [new file with mode: 0644]

index 4f14063..1412997 100644 (file)
@@ -1,8 +1,8 @@
-# Binary branch name: ffmpeg/master_20170704
-# Binaries were created for OpenCV: f670a9927026629a4083e05a1612f0adcad7727e
-set(FFMPEG_BINARIES_COMMIT "a86e53eb35737a50e5100e26af3aa1d29e810890")
-set(FFMPEG_FILE_HASH_BIN32 "79c35cc654778e66237444bc562afbca")
-set(FFMPEG_FILE_HASH_BIN64 "0dc72775ec3c14d1e049f51dc1280dbb")
+# Binary branch name: ffmpeg/master_20171009
+# Binaries were created for OpenCV: 8ac2c5d620b467d3f22802e96c88ddde6da707af
+set(FFMPEG_BINARIES_COMMIT "66b1fed06cf3510235f367f96aa26da5cb234a15")
+set(FFMPEG_FILE_HASH_BIN32 "3ae76b105113d944984b2351c61e21c6")
+set(FFMPEG_FILE_HASH_BIN64 "cf3bb5bc9d393b022ea7a42eb63e794d")
 set(FFMPEG_FILE_HASH_CMAKE "ec59008da403fb18ab3c1ed66aed583b")
 
 function(download_win_ffmpeg script_var)
index 969cd99..b323255 100644 (file)
@@ -38,7 +38,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
   set_source_files_properties(jcdctmgr.c PROPERTIES COMPILE_FLAGS "-O1")
 endif()
 
-ocv_warnings_disable(CMAKE_C_FLAGS -Wcast-align -Wshadow -Wunused)
+ocv_warnings_disable(CMAKE_C_FLAGS -Wcast-align -Wshadow -Wunused -Wshift-negative-value)
 ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter) # clang
 ocv_warnings_disable(CMAKE_C_FLAGS /wd4013 /wd4244 /wd4267) # vs2005
 
index 2d288af..8e9bd0b 100644 (file)
@@ -255,6 +255,7 @@ OCV_OPTION(WITH_ITT            "Include Intel ITT support"                   ON
 # ===================================================
 OCV_OPTION(BUILD_SHARED_LIBS        "Build shared libraries (.dll/.so) instead of static ones (.lib/.a)" NOT (ANDROID OR APPLE_FRAMEWORK) )
 OCV_OPTION(BUILD_opencv_apps        "Build utility applications (used for example to train classifiers)" (NOT ANDROID AND NOT WINRT) IF (NOT APPLE_FRAMEWORK) )
+OCV_OPTION(BUILD_opencv_js          "Build JavaScript bindings by Emscripten" OFF )
 OCV_OPTION(BUILD_ANDROID_EXAMPLES   "Build examples for Android platform"         ON  IF ANDROID )
 OCV_OPTION(BUILD_DOCS               "Create build rules for OpenCV Documentation" ON  IF (NOT WINRT OR APPLE_FRAMEWORK))
 OCV_OPTION(BUILD_EXAMPLES           "Build all examples"                          OFF )
@@ -297,6 +298,7 @@ OCV_OPTION(ENABLE_PROFILING           "Enable profiling in the GCC compiler (Add
 OCV_OPTION(ENABLE_COVERAGE            "Enable coverage collection with  GCov"                    OFF  IF CMAKE_COMPILER_IS_GNUCXX )
 OCV_OPTION(ENABLE_OMIT_FRAME_POINTER  "Enable -fomit-frame-pointer for GCC"                      ON   IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) )
 OCV_OPTION(ENABLE_POWERPC             "Enable PowerPC for GCC"                                   ON   IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
+OCV_OPTION(ENABLE_VSX                 "Enable POWER8 and above VSX (64-bit little-endian)"       ON   IF (CMAKE_COMPILER_IS_GNUCXX AND PPC64LE) )
 OCV_OPTION(ENABLE_FAST_MATH           "Enable -ffast-math (not recommended for GCC 4.6.x)"       OFF  IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
 OCV_OPTION(ENABLE_NEON                "Enable NEON instructions"                                 (NEON OR ANDROID_ARM_NEON OR AARCH64) IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) )
 OCV_OPTION(ENABLE_VFPV3               "Enable VFPv3-D32 instructions"                            OFF  IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) )
index 45a536b..fea13ce 100644 (file)
@@ -28,6 +28,7 @@
 
 set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3") # without AVX512
 list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16)
+list(APPEND CPU_ALL_OPTIMIZATIONS VSX)
 list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS)
 
 ocv_update(CPU_VFPV3_FEATURE_ALIAS "")
@@ -79,6 +80,7 @@ ocv_optimization_process_obsolete_option(ENABLE_FMA3 FMA3 ON)
 ocv_optimization_process_obsolete_option(ENABLE_VFPV3 VFPV3 OFF)
 ocv_optimization_process_obsolete_option(ENABLE_NEON NEON OFF)
 
+ocv_optimization_process_obsolete_option(ENABLE_VSX VSX OFF)
 
 macro(ocv_is_optimization_in_list resultvar check_opt)
   set(__checked "")
@@ -266,6 +268,15 @@ elseif(ARM OR AARCH64)
     ocv_update(CPU_FP16_IMPLIES "NEON")
     set(CPU_BASELINE "NEON;FP16" CACHE STRING "${HELP_CPU_BASELINE}")
   endif()
+elseif(PPC64LE)
+  ocv_update(CPU_KNOWN_OPTIMIZATIONS "VSX")
+  ocv_update(CPU_VSX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx.cpp")
+
+  if(CMAKE_COMPILER_IS_CLANGCXX AND (NOT ${CMAKE_CXX_COMPILER} MATCHES "xlc"))
+    ocv_update(CPU_VSX_FLAGS_ON "-mvsx -maltivec")
+  else()
+    ocv_update(CPU_VSX_FLAGS_ON "-mcpu=power8")
+  endif()
 endif()
 
 # Helper values for cmake-gui
index 0a7ff09..c422109 100644 (file)
@@ -73,7 +73,7 @@ if(CUDA_FOUND)
   elseif(CUDA_GENERATION STREQUAL "Volta")
     set(__cuda_arch_bin "7.0")
   elseif(CUDA_GENERATION STREQUAL "Auto")
-    execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
+    execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
                      WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
                      RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
                      ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
@@ -90,7 +90,7 @@ if(CUDA_FOUND)
       set(__cuda_arch_bin "3.2")
       set(__cuda_arch_ptx "")
     elseif(AARCH64)
-      execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
+      execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
                        WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
                        RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
                        ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
index 7b0d4c3..cfb613b 100644 (file)
@@ -72,6 +72,8 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
   set(ARM 1)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
   set(AARCH64 1)
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^ppc64le.*|PPC64LE.*")
+  set(PPC64LE 1)
 endif()
 
 # Workaround for 32-bit operating systems on 64-bit x86_64 processor
index 426487a..9417a53 100644 (file)
@@ -45,7 +45,7 @@ function(ocv_tbb_env_guess _found)
   find_library(TBB_ENV_LIB NAMES "tbb")
   find_library(TBB_ENV_LIB_DEBUG NAMES "tbb_debug" PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH NO_DEFAULT_PATH)
   find_library(TBB_ENV_LIB_DEBUG NAMES "tbb_debug")
-  if (TBB_ENV_INCLUDE AND TBB_ENV_LIB)
+  if (TBB_ENV_INCLUDE AND (TBB_ENV_LIB OR TBB_ENV_LIB_DEBUG))
     ocv_tbb_verify()
     ocv_tbb_read_version("${TBB_ENV_INCLUDE}")
     add_library(tbb UNKNOWN IMPORTED)
index 800b400..4e9308d 100644 (file)
@@ -137,7 +137,7 @@ if(BUILD_IPP_IW)
 
   # Package sources
   get_filename_component(__PATH "${IPPROOT}/../${IW_PACKAGE_SUBDIR}/" ABSOLUTE)
-  ippiw_setup("${_PATH}" 1)
+  ippiw_setup("${__PATH}" 1)
 endif()
 
 
index e555d1f..5682df6 100644 (file)
@@ -211,8 +211,8 @@ if(WITH_GDAL)
     find_package(GDAL QUIET)
 
     if(NOT GDAL_FOUND)
-        ocv_clear_vars(GDAL_LIBRARY GDAL_INCLUDE_DIR)
         set(HAVE_GDAL NO)
+        ocv_clear_vars(GDAL_VERSION GDAL_LIBRARIES)
     else()
         set(HAVE_GDAL YES)
         ocv_include_directories(${GDAL_INCLUDE_DIR})
index 21a82a4..363e46b 100644 (file)
@@ -31,6 +31,9 @@ elseif(ARM)
 elseif(AARCH64)
   set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "arm64")
   set(CPACK_RPM_PACKAGE_ARCHITECTURE "aarch64")
+elseif(PPC64LE)
+  set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "ppc64el")
+  set(CPACK_RPM_PACKAGE_ARCHITECTURE "ppc64le")
 else()
   set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
   set(CPACK_RPM_PACKAGE_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
@@ -164,4 +167,4 @@ endif(NOT OPENCV_CUSTOM_PACKAGE_INFO)
 
 include(CPack)
 
-ENDif(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")
\ No newline at end of file
+ENDif(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")
diff --git a/cmake/checks/cpu_vsx.cpp b/cmake/checks/cpu_vsx.cpp
new file mode 100644 (file)
index 0000000..6d74482
--- /dev/null
@@ -0,0 +1,12 @@
+# if defined(__VSX__)
+#   include <altivec.h>
+# else
+#   error "VSX is not supported"
+# endif
+
+int main()
+{
+    __vector float testF = vec_splats(0.f);
+    testF = vec_madd(testF, testF, testF);
+    return 0;
+}
index 4243409..92266ab 100644 (file)
@@ -33,8 +33,9 @@ endif(HAVE_DOC_GENERATOR)
 
 if(BUILD_DOCS AND DOXYGEN_FOUND)
   # not documented modules list
-  list(APPEND blacklist "ts" "java" "python2" "python3" "world" "contrib_world")
+  list(APPEND blacklist "ts" "java" "python2" "python3" "js" "world" "contrib_world")
   unset(CMAKE_DOXYGEN_TUTORIAL_CONTRIB_ROOT)
+  unset(CMAKE_DOXYGEN_TUTORIAL_JS_ROOT)
 
   # gathering headers
   set(paths_include)
@@ -134,11 +135,13 @@ if(BUILD_DOCS AND DOXYGEN_FOUND)
   set(faqfile "${CMAKE_CURRENT_SOURCE_DIR}/faq.markdown")
   set(tutorial_path "${CMAKE_CURRENT_SOURCE_DIR}/tutorials")
   set(tutorial_py_path "${CMAKE_CURRENT_SOURCE_DIR}/py_tutorials")
+  set(CMAKE_DOXYGEN_TUTORIAL_JS_ROOT "- @ref tutorial_js_root")
+  set(tutorial_js_path "${CMAKE_CURRENT_SOURCE_DIR}/js_tutorials")
   set(example_path "${CMAKE_SOURCE_DIR}/samples")
 
   # set export variables
-  string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_INPUT_LIST "${rootfile} ; ${faqfile} ; ${paths_include} ; ${paths_hal_interface} ; ${paths_doc} ; ${tutorial_path} ; ${tutorial_py_path} ; ${paths_tutorial} ; ${tutorial_contrib_root}")
-  string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_IMAGE_PATH "${paths_doc} ; ${tutorial_path} ; ${tutorial_py_path} ; ${paths_tutorial}")
+  string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_INPUT_LIST "${rootfile} ; ${faqfile} ; ${paths_include} ; ${paths_hal_interface} ; ${paths_doc} ; ${tutorial_path} ; ${tutorial_py_path} ; ${tutorial_js_path} ; ${paths_tutorial} ; ${tutorial_contrib_root}")
+  string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_IMAGE_PATH "${paths_doc} ; ${tutorial_path} ; ${tutorial_py_path} ; ${tutorial_js_path} ; ${paths_tutorial}")
   # TODO: remove paths_doc from EXAMPLE_PATH after face module tutorials/samples moved to separate folders
   string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_EXAMPLE_PATH  "${example_path} ; ${paths_doc} ; ${paths_sample}")
   set(CMAKE_DOXYGEN_LAYOUT "${CMAKE_CURRENT_SOURCE_DIR}/DoxygenLayout.xml")
@@ -163,9 +166,46 @@ if(BUILD_DOCS AND DOXYGEN_FOUND)
   configure_file(Doxyfile.in ${doxyfile} @ONLY)
   configure_file(root.markdown.in ${rootfile} @ONLY)
 
+  # js tutorial assets
+  set(opencv_tutorial_html_dir "${CMAKE_CURRENT_BINARY_DIR}/doxygen/html")
+  set(js_tutorials_assets_dir "${CMAKE_CURRENT_SOURCE_DIR}/js_tutorials/js_assets")
+  set(js_tutorials_assets_deps "")
+
+  # make sure the build directory exists
+  file(MAKE_DIRECTORY "${opencv_tutorial_html_dir}")
+
+  # gather and copy specific files for js tutorials
+  file(GLOB_RECURSE js_assets "${js_tutorials_assets_dir}/*")
+  ocv_list_filterout(js_assets "\\\\.eslintrc.json")
+  list(APPEND js_assets "${OpenCV_SOURCE_DIR}/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/Data/box.mp4")
+
+  if(BUILD_opencv_js)
+    set(ocv_js_dir "${CMAKE_BINARY_DIR}/bin")
+    set(ocv_js "opencv.js")
+    list(APPEND js_assets "${ocv_js_dir}/${ocv_js}")
+  elseif(DEFINED OPENCV_JS_LOCATION)
+    list(APPEND js_assets "${OPENCV_JS_LOCATION}")
+  endif()
+
+  # copy haar cascade files
+  set(haar_cascade_files "")
+  set(data_harrcascades_path "${OpenCV_SOURCE_DIR}/data/haarcascades/")
+  list(APPEND js_tutorials_assets_deps "${data_harrcascades_path}/haarcascade_frontalface_default.xml" "${data_harrcascades_path}/haarcascade_eye.xml")
+  list(APPEND js_assets "${data_harrcascades_path}/haarcascade_frontalface_default.xml" "${data_harrcascades_path}/haarcascade_eye.xml")
+
+  foreach(f ${js_assets})
+    get_filename_component(fname "${f}" NAME)
+    add_custom_command(OUTPUT "${opencv_tutorial_html_dir}/${fname}"
+                       COMMAND ${CMAKE_COMMAND} -E copy_if_different "${f}" "${opencv_tutorial_html_dir}/${fname}"
+                       DEPENDS "${f}"
+                       COMMENT "Copying ${fname}"
+    )
+    list(APPEND js_tutorials_assets_deps "${f}" "${opencv_tutorial_html_dir}/${fname}")
+  endforeach()
+
   add_custom_target(doxygen
     COMMAND ${DOXYGEN_EXECUTABLE} ${doxyfile}
-    DEPENDS ${doxyfile} ${rootfile} ${bibfile} ${deps}
+    DEPENDS ${doxyfile} ${rootfile} ${bibfile} ${deps} ${js_tutorials_assets_deps}
   )
   install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/doxygen/html
     DESTINATION "${OPENCV_DOC_INSTALL_PATH}"
index 1db5427..3822554 100644 (file)
@@ -174,7 +174,7 @@ FORMULA_FONTSIZE       = 14
 FORMULA_TRANSPARENT    = YES
 USE_MATHJAX            = YES
 MATHJAX_FORMAT         = HTML-CSS
-MATHJAX_RELPATH        = http://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0
+MATHJAX_RELPATH        = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0
 MATHJAX_EXTENSIONS     = TeX/AMSmath TeX/AMSsymbols
 MATHJAX_CODEFILE       = @CMAKE_CURRENT_SOURCE_DIR@/mymath.js
 SEARCHENGINE           = YES
@@ -255,7 +255,7 @@ PREDEFINED             = __cplusplus=1 \
 EXPAND_AS_DEFINED      =
 SKIP_FUNCTION_MACROS   = YES
 TAGFILES               =
-GENERATE_TAGFILE       =
+GENERATE_TAGFILE       = @CMAKE_DOXYGEN_OUTPUT_PATH@/html/opencv.tag
 ALLEXTERNALS           = NO
 EXTERNAL_GROUPS        = YES
 EXTERNAL_PAGES         = YES
diff --git a/doc/js_tutorials/js_assets/.eslintrc.json b/doc/js_tutorials/js_assets/.eslintrc.json
new file mode 100644 (file)
index 0000000..abe9037
--- /dev/null
@@ -0,0 +1,22 @@
+{
+  "extends": "google",
+  "parserOptions": {
+    "ecmaVersion": 6
+  },
+  "rules": {
+    "max-len": ["error", 100, {"ignoreUrls": true}],
+    "quotes": ["error", "single"],
+    "indent": ["error", 4, {"ArrayExpression": "first",
+                            "ObjectExpression": "first",
+                            "CallExpression": {"arguments": "first"},
+                            "SwitchCase": 1}],
+    "require-jsdoc": "off",
+    "new-cap": "off"
+  },
+  "plugins": ["html"],
+  "settings": {
+    "html/javascript-mime-types": ["text/javascript", "text/code-snippet"],
+    "html/indent": "0",
+    "html/report-bad-indent": "error"
+  }
+}
diff --git a/doc/js_tutorials/js_assets/apple.jpg b/doc/js_tutorials/js_assets/apple.jpg
new file mode 100644 (file)
index 0000000..a00252b
Binary files /dev/null and b/doc/js_tutorials/js_assets/apple.jpg differ
diff --git a/doc/js_tutorials/js_assets/coins.jpg b/doc/js_tutorials/js_assets/coins.jpg
new file mode 100644 (file)
index 0000000..bf55f35
Binary files /dev/null and b/doc/js_tutorials/js_assets/coins.jpg differ
diff --git a/doc/js_tutorials/js_assets/cup.mp4 b/doc/js_tutorials/js_assets/cup.mp4
new file mode 100644 (file)
index 0000000..fbe79aa
Binary files /dev/null and b/doc/js_tutorials/js_assets/cup.mp4 differ
diff --git a/doc/js_tutorials/js_assets/handDst.jpg b/doc/js_tutorials/js_assets/handDst.jpg
new file mode 100644 (file)
index 0000000..5ed148a
Binary files /dev/null and b/doc/js_tutorials/js_assets/handDst.jpg differ
diff --git a/doc/js_tutorials/js_assets/handSrc.jpg b/doc/js_tutorials/js_assets/handSrc.jpg
new file mode 100644 (file)
index 0000000..083ab56
Binary files /dev/null and b/doc/js_tutorials/js_assets/handSrc.jpg differ
diff --git a/doc/js_tutorials/js_assets/js_basic_ops_copymakeborder.html b/doc/js_tutorials/js_assets/js_basic_ops_copymakeborder.html
new file mode 100644 (file)
index 0000000..126b131
--- /dev/null
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Padding Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Padding Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+// You can try more different parameters
+let s = new cv.Scalar(255, 0, 0, 255);
+cv.copyMakeBorder(src, dst, 10, 10, 10, 10, cv.BORDER_CONSTANT, s);
+cv.imshow('canvasOutput', dst);
+src.delete();
+dst.delete();
+</script>
+<script>
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_basic_ops_roi.html b/doc/js_tutorials/js_assets/js_basic_ops_roi.html
new file mode 100644 (file)
index 0000000..688c23d
--- /dev/null
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image ROI Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image ROI Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+// You can try more different parameters
+let rect = new cv.Rect(100, 100, 200, 200);
+dst = src.roi(rect);
+cv.imshow('canvasOutput', dst);
+src.delete();
+dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_bg_subtraction.html b/doc/js_tutorials/js_assets/js_bg_subtraction.html
new file mode 100644 (file)
index 0000000..79f860c
--- /dev/null
@@ -0,0 +1,126 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Background Subtraction Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Background Subtraction Example</h2>
+<p>
+    Click <b>Start/Stop</b> button to start or stop the camera capture.<br>
+    The <b>videoInput</b> is a &lt;video&gt; element used as input.
+    The <b>canvasOutput</b> is a &lt;canvas&gt; element used as output.<br>
+    The code of &lt;textarea&gt; will be executed when video is started.
+    You can modify the code to investigate more.
+</p>
+<div>
+<div class="control"><button id="startAndStop" disabled>Start</button></div>
+<textarea class="code" rows="29" cols="80" id="codeEditor" spellcheck="false">
+</textarea>
+</div>
+<p class="err" id="errorMessage"></p>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <video id="videoInput" width="320" height="240" muted loop></video>
+        </td>
+        <td>
+            <canvas id="canvasOutput" width="320" height="240"></canvas>
+        </td>
+        <td></td>
+        <td></td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">videoInput</div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+        <td></td>
+        <td></td>
+    </tr>
+    </table>
+</div>
+<script src="https://webrtc.github.io/adapter/adapter-5.0.4.js" type="text/javascript"></script>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let video = document.getElementById('videoInput');
+let cap = new cv.VideoCapture(video);
+
+let frame = new cv.Mat(video.height, video.width, cv.CV_8UC4);
+let fgmask = new cv.Mat(video.height, video.width, cv.CV_8UC1);
+let fgbg = new cv.BackgroundSubtractorMOG2(500, 16, true);
+
+const FPS = 30;
+function processVideo() {
+    try {
+        if (!streaming) {
+            // clean and stop.
+            frame.delete(); fgmask.delete(); fgbg.delete();
+            return;
+        }
+        let begin = Date.now();
+        // start processing.
+        cap.read(frame);
+        fgbg.apply(frame, fgmask);
+        cv.imshow('canvasOutput', fgmask);
+        // schedule the next one.
+        let delay = 1000/FPS - (Date.now() - begin);
+        setTimeout(processVideo, delay);
+    } catch (err) {
+        utils.printError(err);
+    }
+};
+
+// schedule the first one.
+setTimeout(processVideo, 0);
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+
+let streaming = false;
+let videoInput = document.getElementById('videoInput');
+let startAndStop = document.getElementById('startAndStop');
+let canvasOutput = document.getElementById('canvasOutput');
+let canvasContext = canvasOutput.getContext('2d');
+
+startAndStop.addEventListener('click', () => {
+    if (!streaming) {
+        utils.clearError();
+        videoInput.play().then(() => {
+            onVideoStarted();
+        });
+    } else {
+        videoInput.pause();
+        videoInput.currentTime = 0;
+        onVideoStopped();
+    }
+});
+
+function onVideoStarted() {
+    streaming = true;
+    startAndStop.innerText = 'Stop';
+    videoInput.height = videoInput.width * (videoInput.videoHeight / videoInput.videoWidth);
+    utils.executeCode('codeEditor');
+}
+
+function onVideoStopped() {
+    streaming = false;
+    canvasContext.clearRect(0, 0, canvasOutput.width, canvasOutput.height);
+    startAndStop.innerText = 'Start';
+}
+
+utils.loadOpenCv(() => {
+    videoInput.addEventListener('canplay', () => {
+        startAndStop.removeAttribute('disabled');
+    });
+    videoInput.src = 'box.mp4';
+});
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/doc/js_tutorials/js_assets/js_camshift.html b/doc/js_tutorials/js_assets/js_camshift.html
new file mode 100644 (file)
index 0000000..046ab20
--- /dev/null
@@ -0,0 +1,172 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>CamShift Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>CamShift Example</h2>
+<p>
+    Click <b>Start/Stop</b> button to start or stop the video.<br>
+    The <b>videoInput</b> is a &lt;video&gt; element used as CamShift input.
+    The <b>canvasOutput</b> is a &lt;canvas&gt; element used as CamShift output.<br>
+    The code of &lt;textarea&gt; will be executed when video is started.
+    You can modify the code to investigate more.
+</p>
+<div>
+<div class="control"><button id="startAndStop" disabled>Start</button></div>
+<textarea class="code" rows="29" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+</div>
+<p class="err" id="errorMessage"></p>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <video id="videoInput" width="320" height="240" muted loop></video>
+        </td>
+        <td>
+            <canvas id="canvasOutput" width="320" height="240"></canvas>
+        </td>
+        <td></td>
+        <td></td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">videoInput</div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+        <td></td>
+        <td></td>
+    </tr>
+    </table>
+</div>
+<script src="https://webrtc.github.io/adapter/adapter-5.0.4.js" type="text/javascript"></script>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let video = document.getElementById('videoInput');
+let cap = new cv.VideoCapture(video);
+
+// take first frame of the video
+let frame = new cv.Mat(video.height, video.width, cv.CV_8UC4);
+cap.read(frame);
+
+// hardcode the initial location of window
+let trackWindow = new cv.Rect(150, 60, 63, 125);
+
+// set up the ROI for tracking
+let roi = frame.roi(trackWindow);
+let hsvRoi = new cv.Mat();
+cv.cvtColor(roi, hsvRoi, cv.COLOR_RGBA2RGB);
+cv.cvtColor(hsvRoi, hsvRoi, cv.COLOR_RGB2HSV);
+let mask = new cv.Mat();
+let lowScalar = new cv.Scalar(30, 30, 0);
+let highScalar = new cv.Scalar(180, 180, 180);
+let low = new cv.Mat(hsvRoi.rows, hsvRoi.cols, hsvRoi.type(), lowScalar);
+let high = new cv.Mat(hsvRoi.rows, hsvRoi.cols, hsvRoi.type(), highScalar);
+cv.inRange(hsvRoi, low, high, mask);
+let roiHist = new cv.Mat();
+let hsvRoiVec = new cv.MatVector();
+hsvRoiVec.push_back(hsvRoi);
+cv.calcHist(hsvRoiVec, [0], mask, roiHist, [180], [0, 180]);
+cv.normalize(roiHist, roiHist, 0, 255, cv.NORM_MINMAX);
+
+// delete useless mats.
+roi.delete(); hsvRoi.delete(); mask.delete(); low.delete(); high.delete(); hsvRoiVec.delete();
+
+// Setup the termination criteria, either 10 iteration or move by atleast 1 pt
+let termCrit = new cv.TermCriteria(cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1);
+
+let hsv = new cv.Mat(video.height, video.width, cv.CV_8UC3);
+let hsvVec = new cv.MatVector();
+hsvVec.push_back(hsv);
+let dst = new cv.Mat();
+let trackBox = null;
+
+const FPS = 30;
+function processVideo() {
+    try {
+        if (!streaming) {
+            // clean and stop.
+            frame.delete(); dst.delete(); hsvVec.delete(); roiHist.delete(); hsv.delete();
+            return;
+        }
+        let begin = Date.now();
+
+        // start processing.
+        cap.read(frame);
+        cv.cvtColor(frame, hsv, cv.COLOR_RGBA2RGB);
+        cv.cvtColor(hsv, hsv, cv.COLOR_RGB2HSV);
+        cv.calcBackProject(hsvVec, [0], roiHist, dst, [0, 180], 1);
+
+        // apply camshift to get the new location
+        [trackBox, trackWindow] = cv.CamShift(dst, trackWindow, termCrit);
+
+        // Draw it on image
+        let pts = cv.rotatedRectPoints(trackBox);
+        cv.line(frame, pts[0], pts[1], [255, 0, 0, 255], 3);
+        cv.line(frame, pts[1], pts[2], [255, 0, 0, 255], 3);
+        cv.line(frame, pts[2], pts[3], [255, 0, 0, 255], 3);
+        cv.line(frame, pts[3], pts[0], [255, 0, 0, 255], 3);
+        cv.imshow('canvasOutput', frame);
+
+        // schedule the next one.
+        let delay = 1000/FPS - (Date.now() - begin);
+        setTimeout(processVideo, delay);
+    } catch (err) {
+        utils.printError(err);
+    }
+};
+
+// schedule the first one.
+setTimeout(processVideo, 0);
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+
+let streaming = false;
+let videoInput = document.getElementById('videoInput');
+let startAndStop = document.getElementById('startAndStop');
+let canvasOutput = document.getElementById('canvasOutput');
+let canvasContext = canvasOutput.getContext('2d');
+
+startAndStop.addEventListener('click', () => {
+    if (!streaming) {
+        utils.clearError();
+        videoInput.play().then(() => {
+            onVideoStarted();
+        });
+    } else {
+        videoInput.pause();
+        videoInput.currentTime = 0;
+        onVideoStopped();
+    }
+});
+
+function onVideoStarted() {
+    streaming = true;
+    startAndStop.innerText = 'Stop';
+    videoInput.height = videoInput.width * (videoInput.videoHeight / videoInput.videoWidth);
+    utils.executeCode('codeEditor');
+}
+
+function onVideoStopped() {
+    streaming = false;
+    canvasContext.clearRect(0, 0, canvasOutput.width, canvasOutput.height);
+    startAndStop.innerText = 'Start';
+}
+
+utils.loadOpenCv(() => {
+    videoInput.addEventListener('canplay', () => {
+        startAndStop.removeAttribute('disabled');
+    });
+    videoInput.src = 'cup.mp4';
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_canny.html b/doc/js_tutorials/js_assets/js_canny.html
new file mode 100644 (file)
index 0000000..48264a0
--- /dev/null
@@ -0,0 +1,68 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Canny Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Canny Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+cv.cvtColor(src, src, cv.COLOR_RGB2GRAY, 0);
+// You can try more different parameters
+cv.Canny(src, dst, 50, 100, 3, false);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_colorspaces_cvtColor.html b/doc/js_tutorials/js_assets/js_colorspaces_cvtColor.html
new file mode 100644 (file)
index 0000000..d6c79a0
--- /dev/null
@@ -0,0 +1,67 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Convert Color Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Convert Color Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+// You can try more different parameters
+cv.cvtColor(src, dst, cv.COLOR_RGBA2GRAY, 0);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_colorspaces_inRange.html b/doc/js_tutorials/js_assets/js_colorspaces_inRange.html
new file mode 100644 (file)
index 0000000..44fd749
--- /dev/null
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image InRange Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image InRange Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let low = new cv.Mat(src.rows, src.cols, src.type(), [0, 0, 0, 0]);
+let high = new cv.Mat(src.rows, src.cols, src.type(), [150, 150, 150, 255]);
+// You can try more different parameters
+cv.inRange(src, low, high, dst);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); low.delete(); high.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_contour_features_approxPolyDP.html b/doc/js_tutorials/js_assets/js_contour_features_approxPolyDP.html
new file mode 100644 (file)
index 0000000..99bd429
--- /dev/null
@@ -0,0 +1,86 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image ApproxPolyDP Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image ApproxPolyDP Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8UC3);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(src, src, 100, 200, cv.THRESH_BINARY);
+let contours = new cv.MatVector();
+let hierarchy = new cv.Mat();
+let poly = new cv.MatVector();
+cv.findContours(src, contours, hierarchy, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE);
+// approximates each contour to polygon
+for (let i = 0; i < contours.size(); ++i) {
+    let tmp = new cv.Mat();
+    let cnt = contours.get(i);
+    // You can try more different parameters
+    cv.approxPolyDP(cnt, tmp, 3, true);
+    poly.push_back(tmp);
+    cnt.delete(); tmp.delete();
+}
+// draw contours with random Scalar
+for (let i = 0; i < contours.size(); ++i) {
+    let color = new cv.Scalar(Math.round(Math.random() * 255), Math.round(Math.random() * 255),
+                              Math.round(Math.random() * 255));
+    cv.drawContours(dst, poly, i, color, 1, 8, hierarchy, 0);
+}
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); hierarchy.delete(); contours.delete(); poly.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_contour_features_area.html b/doc/js_tutorials/js_assets/js_contour_features_area.html
new file mode 100644 (file)
index 0000000..5f447fe
--- /dev/null
@@ -0,0 +1,63 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Area Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Area Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <div>
+        <canvas id="canvasInput"></canvas>
+        <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+    </div>
+    <div>
+        <p><strong>The area is: </strong><span id="areaOutput"></span></p>
+    </div>
+
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8UC3);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(src, src, 177, 200, cv.THRESH_BINARY);
+let contours = new cv.MatVector();
+let hierarchy = new cv.Mat();
+cv.findContours(src, contours, hierarchy, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE);
+let cnt = contours.get(20);
+// You can try more different parameters
+let area = cv.contourArea(cnt, false);
+areaOutput.innerHTML = area;
+src.delete(); dst.delete(); contours.delete(); hierarchy.delete(); cnt.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_contour_features_boundingRect.html b/doc/js_tutorials/js_assets/js_contour_features_boundingRect.html
new file mode 100644 (file)
index 0000000..a0586da
--- /dev/null
@@ -0,0 +1,79 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Bounding Rect Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Bounding Rect Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8UC3);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(src, src, 177, 200, cv.THRESH_BINARY);
+let contours = new cv.MatVector();
+let hierarchy = new cv.Mat();
+cv.findContours(src, contours, hierarchy, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE);
+let cnt = contours.get(0);
+// You can try more different parameters
+let rect = cv.boundingRect(cnt);
+let contoursColor = new cv.Scalar(255, 255, 255);
+let rectangleColor = new cv.Scalar(255, 0, 0);
+cv.drawContours(dst, contours, 0, contoursColor, 1, 8, hierarchy, 100);
+let point1 = new cv.Point(rect.x, rect.y);
+let point2 = new cv.Point(rect.x + rect.width, rect.y + rect.height);
+cv.rectangle(dst, point1, point2, rectangleColor, 2, cv.LINE_AA, 0);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); contours.delete(); hierarchy.delete(); cnt.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_contour_features_convexHull.html b/doc/js_tutorials/js_assets/js_contour_features_convexHull.html
new file mode 100644 (file)
index 0000000..1e4fae4
--- /dev/null
@@ -0,0 +1,86 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Convex Hull Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Convex Hull Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8UC3);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(src, src, 100, 200, cv.THRESH_BINARY);
+let contours = new cv.MatVector();
+let hierarchy = new cv.Mat();
+let hull = new cv.MatVector();
+cv.findContours(src, contours, hierarchy, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE);
+// approximates each contour to convex hull
+for (let i = 0; i < contours.size(); ++i) {
+    let tmp = new cv.Mat();
+    let cnt = contours.get(i);
+    // You can try more different parameters
+    cv.convexHull(cnt, tmp, false, true);
+    hull.push_back(tmp);
+    cnt.delete(); tmp.delete();
+}
+// draw contours with random Scalar
+for (let i = 0; i < contours.size(); ++i) {
+    let colorHull = new cv.Scalar(Math.round(Math.random() * 255), Math.round(Math.random() * 255),
+                                  Math.round(Math.random() * 255));
+    cv.drawContours(dst, hull, i, colorHull, 1, 8, hierarchy, 0);
+}
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); hierarchy.delete(); contours.delete(); hull.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_contour_features_fitEllipse.html b/doc/js_tutorials/js_assets/js_contour_features_fitEllipse.html
new file mode 100644 (file)
index 0000000..7ae1e48
--- /dev/null
@@ -0,0 +1,77 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Fit Ellipse Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Fit Ellipse Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8UC3);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(src, src, 177, 200, cv.THRESH_BINARY);
+let contours = new cv.MatVector();
+let hierarchy = new cv.Mat();
+cv.findContours(src, contours, hierarchy, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE);
+let cnt = contours.get(0);
+// You can try more different parameters
+let rotatedRect = cv.fitEllipse(cnt);
+let contoursColor = new cv.Scalar(255, 255, 255);
+let ellipseColor = new cv.Scalar(255, 0, 0);
+cv.drawContours(dst, contours, 0, contoursColor, 1, 8, hierarchy, 100);
+cv.ellipse1(dst, rotatedRect, ellipseColor, 1, cv.LINE_8);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); contours.delete(); hierarchy.delete(); cnt.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_contour_features_fitLine.html b/doc/js_tutorials/js_assets/js_contour_features_fitLine.html
new file mode 100644 (file)
index 0000000..f339819
--- /dev/null
@@ -0,0 +1,86 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Fit Line Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Fit Line Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8UC3);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(src, src, 177, 200, cv.THRESH_BINARY);
+let contours = new cv.MatVector();
+let hierarchy = new cv.Mat();
+let line = new cv.Mat();
+cv.findContours(src, contours, hierarchy, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE);
+let cnt = contours.get(0);
+// You can try more different parameters
+cv.fitLine(cnt, line, cv.DIST_L2, 0, 0.01, 0.01);
+let contoursColor = new cv.Scalar(255, 255, 255);
+let lineColor = new cv.Scalar(255, 0, 0);
+cv.drawContours(dst, contours, 0, contoursColor, 1, 8, hierarchy, 100);
+let vx = line.data32F[0];
+let vy = line.data32F[1];
+let x = line.data32F[2];
+let y = line.data32F[3];
+let lefty = Math.round((-x * vy / vx) + y);
+let righty = Math.round(((src.cols - x) * vy / vx) + y);
+let point1 = new cv.Point(src.cols - 1, righty);
+let point2 = new cv.Point(0, lefty);
+cv.line(dst, point1, point2, lineColor, 2, cv.LINE_AA, 0);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); contours.delete(); hierarchy.delete(); line.delete(); cnt.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_contour_features_minAreaRect.html b/doc/js_tutorials/js_assets/js_contour_features_minAreaRect.html
new file mode 100644 (file)
index 0000000..990be37
--- /dev/null
@@ -0,0 +1,81 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Min Area Rect Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Min Area Rect Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8UC3);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(src, src, 177, 200, cv.THRESH_BINARY);
+let contours = new cv.MatVector();
+let hierarchy = new cv.Mat();
+cv.findContours(src, contours, hierarchy, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE);
+let cnt = contours.get(0);
+// You can try more different parameters
+let rotatedRect = cv.minAreaRect(cnt);
+let vertices = cv.RotatedRect.points(rotatedRect);
+let contoursColor = new cv.Scalar(255, 255, 255);
+let rectangleColor = new cv.Scalar(255, 0, 0);
+cv.drawContours(dst, contours, 0, contoursColor, 1, 8, hierarchy, 100);
+// draw rotatedRect
+for (let i = 0; i < 4; i++) {
+    cv.line(dst, vertices[i], vertices[(i + 1) % 4], rectangleColor, 2, cv.LINE_AA, 0);
+}
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); contours.delete(); hierarchy.delete(); cnt.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_contour_features_minEnclosingCircle.html b/doc/js_tutorials/js_assets/js_contour_features_minEnclosingCircle.html
new file mode 100644 (file)
index 0000000..77b6d7a
--- /dev/null
@@ -0,0 +1,77 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Min Enclosing Circle Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Min Enclosing Circle Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8UC3);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(src, src, 177, 200, cv.THRESH_BINARY);
+let contours = new cv.MatVector();
+let hierarchy = new cv.Mat();
+cv.findContours(src, contours, hierarchy, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE);
+let cnt = contours.get(0);
+// You can try more different parameters
+let circle = cv.minEnclosingCircle(cnt);
+let contoursColor = new cv.Scalar(255, 255, 255);
+let circleColor = new cv.Scalar(255, 0, 0);
+cv.drawContours(dst, contours, 0, contoursColor, 1, 8, hierarchy, 100);
+cv.circle(dst, circle.center, circle.radius, circleColor);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); contours.delete(); hierarchy.delete(); cnt.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_contour_features_moments.html b/doc/js_tutorials/js_assets/js_contour_features_moments.html
new file mode 100644 (file)
index 0000000..a8fee29
--- /dev/null
@@ -0,0 +1,62 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Moments Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Moments Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <div>
+        <canvas id="canvasInput"></canvas>
+        <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+    </div>
+    <div>
+        <p><strong>The m00 is: </strong><span id="momentsOutput"></span></p>
+    </div>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8UC3);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(src, src, 177, 200, cv.THRESH_BINARY);
+let contours = new cv.MatVector();
+let hierarchy = new cv.Mat();
+cv.findContours(src, contours, hierarchy, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE);
+let cnt = contours.get(0);
+// You can try more different parameters
+let Moments = cv.moments(cnt, false);
+momentsOutput.innerHTML = Moments.m00;
+src.delete(); dst.delete(); contours.delete(); hierarchy.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_contour_features_perimeter.html b/doc/js_tutorials/js_assets/js_contour_features_perimeter.html
new file mode 100644 (file)
index 0000000..b94ca18
--- /dev/null
@@ -0,0 +1,62 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Perimeter Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Perimeter Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <div>
+        <canvas id="canvasInput"></canvas>
+        <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+    </div>
+    <div>
+        <p><strong>The perimeter is: </strong><span id="perimeterOutput"></span></p>
+    </div>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8UC3);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(src, src, 177, 200, cv.THRESH_BINARY);
+let contours = new cv.MatVector();
+let hierarchy = new cv.Mat();
+cv.findContours(src, contours, hierarchy, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE);
+let cnt = contours.get(20);
+// You can try more different parameters
+let perimeter = cv.arcLength(cnt, true);
+perimeterOutput.innerHTML = perimeter;
+src.delete(); dst.delete(); contours.delete(); hierarchy.delete(); cnt.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_contour_properties_transpose.html b/doc/js_tutorials/js_assets/js_contour_properties_transpose.html
new file mode 100644 (file)
index 0000000..6aefa01
--- /dev/null
@@ -0,0 +1,68 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Transpose Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Transpose Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(src, src, 120, 200, cv.THRESH_BINARY);
+cv.transpose(src, dst);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_contours_begin_contours.html b/doc/js_tutorials/js_assets/js_contours_begin_contours.html
new file mode 100644 (file)
index 0000000..8efd329
--- /dev/null
@@ -0,0 +1,77 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Contours Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Contours Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.cols, src.rows, cv.CV_8UC3);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(src, src, 120, 200, cv.THRESH_BINARY);
+let contours = new cv.MatVector();
+let hierarchy = new cv.Mat();
+// You can try more different parameters
+cv.findContours(src, contours, hierarchy, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE);
+// draw contours with random Scalar
+for (let i = 0; i < contours.size(); ++i) {
+    let color = new cv.Scalar(Math.round(Math.random() * 255), Math.round(Math.random() * 255),
+                              Math.round(Math.random() * 255));
+    cv.drawContours(dst, contours, i, color, 1, cv.LINE_8, hierarchy, 100);
+}
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); contours.delete(); hierarchy.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_contours_more_functions_convexityDefects.html b/doc/js_tutorials/js_assets/js_contours_more_functions_convexityDefects.html
new file mode 100644 (file)
index 0000000..62861f7
--- /dev/null
@@ -0,0 +1,87 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Convexity Defects Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Convexity Defects Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8UC3);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(src, src, 100, 200, cv.THRESH_BINARY);
+let contours = new cv.MatVector();
+let hierarchy = new cv.Mat();
+cv.findContours(src, contours, hierarchy, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE);
+let hull = new cv.Mat();
+let defect = new cv.Mat();
+let cnt = contours.get(0);
+let lineColor = new cv.Scalar(255, 0, 0);
+let circleColor = new cv.Scalar(255, 255, 255);
+cv.convexHull(cnt, hull, false, false);
+cv.convexityDefects(cnt, hull, defect);
+for (let i = 0; i < defect.rows; ++i) {
+    let start = new cv.Point(cnt.data32S[defect.data32S[i * 4] * 2],
+                             cnt.data32S[defect.data32S[i * 4] * 2 + 1]);
+    let end = new cv.Point(cnt.data32S[defect.data32S[i * 4 + 1] * 2],
+                           cnt.data32S[defect.data32S[i * 4 + 1] * 2 + 1]);
+    let far = new cv.Point(cnt.data32S[defect.data32S[i * 4 + 2] * 2],
+                           cnt.data32S[defect.data32S[i * 4 + 2] * 2 + 1]);
+    cv.line(dst, start, end, lineColor, 2, cv.LINE_AA, 0);
+    cv.circle(dst, far, 3, circleColor, -1);
+}
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); hierarchy.delete(); contours.delete(); hull.delete(); defect.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_contours_more_functions_shape.html b/doc/js_tutorials/js_assets/js_contours_more_functions_shape.html
new file mode 100644 (file)
index 0000000..6b899ed
--- /dev/null
@@ -0,0 +1,82 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Match Shape Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Match Shape Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<div>
+    <p><strong>The result is: </strong><span id="matchShapesOutput"></span></p>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8UC3);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(src, src, 177, 200, cv.THRESH_BINARY);
+let contours = new cv.MatVector();
+let hierarchy = new cv.Mat();
+cv.findContours(src, contours, hierarchy, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE);
+let contourID0 = 10;
+let contourID1 = 5;
+let color0 = new cv.Scalar(255, 0, 0);
+let color1 = new cv.Scalar(0, 0, 255);
+// You can try more different parameters
+let result = cv.matchShapes(contours.get(contourID0), contours.get(contourID1), 1, 0);
+matchShapesOutput.innerHTML = result;
+cv.drawContours(dst, contours, contourID0, color0, 1, cv.LINE_8, hierarchy, 100);
+cv.drawContours(dst, contours, contourID1, color1, 1, cv.LINE_8, hierarchy, 100);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); contours.delete(); hierarchy.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('coins.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_example_style.css b/doc/js_tutorials/js_assets/js_example_style.css
new file mode 100644 (file)
index 0000000..7c0cb72
--- /dev/null
@@ -0,0 +1,70 @@
+body, div, p {
+    font: 400 14px/22px Roboto,sans-serif;
+}
+canvas, img, video {
+    border: 1px solid black;
+}
+td {
+    padding: 10px 0px 0px 10px;
+    text-align: center;
+}
+button {
+    display: inline-block;
+    color: #fff;
+    background-color: #337ab7;
+    border-color: #2e6da4;
+    padding: 6px 12px;
+    margin-bottom: 0;
+    font-size: 14px;
+    font-weight: bold;
+    text-align: center;
+    white-space: nowrap;
+    vertical-align: middle;
+    -ms-touch-action: manipulation;
+    touch-action: manipulation;
+    cursor: pointer;
+    -webkit-user-select: none;
+    -moz-user-select: none;
+    -ms-user-select: none;
+    user-select: none;
+    background-image: none;
+    border: 1px solid transparent;
+    border-radius: 4px;
+}
+button[disabled] {
+    cursor: not-allowed;
+    filter: alpha(opacity=65);
+    -webkit-box-shadow: none;
+    box-shadow: none;
+    opacity: .65;
+}
+.control {
+    margin-bottom: 3px;
+}
+.err {
+    color: red;
+    font-weight: bold;
+}
+.caption {
+    margin: 0;
+    font-weight: bold;
+}
+.code {
+    padding: 4px 6px;
+    margin: 4px 8px 4px 2px;
+    background-color: #FBFCFD;
+    border: 1px solid #C4CFE5;
+    font-family: monospace, fixed;
+    font-size: 13px;
+    min-height: 13px;
+    line-height: 1.0;
+    text-wrap: unrestricted;
+    padding-bottom: 0px;
+    margin: 0px;
+}
+.hidden {
+    display: none;
+}
+.small {
+    max-width: 300px;
+}
diff --git a/doc/js_tutorials/js_assets/js_face_detection.html b/doc/js_tutorials/js_assets/js_face_detection.html
new file mode 100644 (file)
index 0000000..6a047ed
--- /dev/null
@@ -0,0 +1,100 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Face Detection Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Face Detection Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let gray = new cv.Mat();
+cv.cvtColor(src, gray, cv.COLOR_RGBA2GRAY, 0);
+let faces = new cv.RectVector();
+let eyes = new cv.RectVector();
+let faceCascade = new cv.CascadeClassifier();
+let eyeCascade = new cv.CascadeClassifier();
+// load pre-trained classifiers
+faceCascade.load('haarcascade_frontalface_default.xml');
+eyeCascade.load('haarcascade_eye.xml');
+// detect faces
+let msize = new cv.Size(0, 0);
+faceCascade.detectMultiScale(gray, faces, 1.1, 3, 0, msize, msize);
+for (let i = 0; i < faces.size(); ++i) {
+    let roiGray = gray.roi(faces.get(i));
+    let roiSrc = src.roi(faces.get(i));
+    let point1 = new cv.Point(faces.get(i).x, faces.get(i).y);
+    let point2 = new cv.Point(faces.get(i).x + faces.get(i).width,
+                              faces.get(i).y + faces.get(i).height);
+    cv.rectangle(src, point1, point2, [255, 0, 0, 255]);
+    // detect eyes in face ROI
+    eyeCascade.detectMultiScale(roiGray, eyes);
+    for (let j = 0; j < eyes.size(); ++j) {
+        let point1 = new cv.Point(eyes.get(j).x, eyes.get(j).y);
+        let point2 = new cv.Point(eyes.get(j).x + eyes.get(j).width,
+                                  eyes.get(j).y + eyes.get(i).height);
+        cv.rectangle(roiSrc, point1, point2, [0, 0, 255, 255]);
+    }
+    roiGray.delete(); roiSrc.delete();
+}
+cv.imshow('canvasOutput', src);
+src.delete(); gray.delete(); faceCascade.delete();
+eyeCascade.delete(); faces.delete(); eyes.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    let eyeCascadeFile = 'haarcascade_eye.xml';
+    utils.createFileFromUrl(eyeCascadeFile, eyeCascadeFile, () => {
+        let faceCascadeFile = 'haarcascade_frontalface_default.xml';
+        utils.createFileFromUrl(faceCascadeFile, faceCascadeFile, () => {
+            tryIt.removeAttribute('disabled');
+        });
+    });
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_face_detection_camera.html b/doc/js_tutorials/js_assets/js_face_detection_camera.html
new file mode 100644 (file)
index 0000000..f839cd7
--- /dev/null
@@ -0,0 +1,142 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Face Detection Camera Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Face Detection Camera Example</h2>
+<p>
+    Click <b>Start/Stop</b> button to start or stop the camera capture.<br>
+    The <b>videoInput</b> is a &lt;video&gt; element used as face detector input.
+    The <b>canvasOutput</b> is a &lt;canvas&gt; element used as face detector output.<br>
+    The code of &lt;textarea&gt; will be executed when video is started.
+    You can modify the code to investigate more.
+</p>
+<div>
+<div class="control"><button id="startAndStop" disabled>Start</button></div>
+<textarea class="code" rows="29" cols="80" id="codeEditor" spellcheck="false">
+</textarea>
+</div>
+<p class="err" id="errorMessage"></p>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <video id="videoInput" width=320 height=240></video>
+        </td>
+        <td>
+            <canvas id="canvasOutput" width=320 height=240></canvas>
+        </td>
+        <td></td>
+        <td></td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">videoInput</div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+        <td></td>
+        <td></td>
+    </tr>
+    </table>
+</div>
+<script src="https://webrtc.github.io/adapter/adapter-5.0.4.js" type="text/javascript"></script>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let video = document.getElementById('videoInput');
+let src = new cv.Mat(video.height, video.width, cv.CV_8UC4);
+let dst = new cv.Mat(video.height, video.width, cv.CV_8UC4);
+let gray = new cv.Mat();
+let cap = new cv.VideoCapture(video);
+let faces = new cv.RectVector();
+let classifier = new cv.CascadeClassifier();
+
+// load pre-trained classifiers
+classifier.load('haarcascade_frontalface_default.xml');
+
+const FPS = 30;
+function processVideo() {
+    try {
+        if (!streaming) {
+            // clean and stop.
+            src.delete();
+            dst.delete();
+            gray.delete();
+            faces.delete();
+            classifier.delete();
+            return;
+        }
+        let begin = Date.now();
+        // start processing.
+        cap.read(src);
+        src.copyTo(dst);
+        cv.cvtColor(dst, gray, cv.COLOR_RGBA2GRAY, 0);
+        // detect faces.
+        classifier.detectMultiScale(gray, faces, 1.1, 3, 0);
+        // draw faces.
+        for (let i = 0; i < faces.size(); ++i) {
+            let face = faces.get(i);
+            let point1 = new cv.Point(face.x, face.y);
+            let point2 = new cv.Point(face.x + face.width, face.y + face.height);
+            cv.rectangle(dst, point1, point2, [255, 0, 0, 255]);
+        }
+        cv.imshow('canvasOutput', dst);
+        // schedule the next one.
+        let delay = 1000/FPS - (Date.now() - begin);
+        setTimeout(processVideo, delay);
+    } catch (err) {
+        utils.printError(err);
+    }
+};
+
+// schedule the first one.
+setTimeout(processVideo, 0);
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+
+let streaming = false;
+let videoInput = document.getElementById('videoInput');
+let startAndStop = document.getElementById('startAndStop');
+let canvasOutput = document.getElementById('canvasOutput');
+let canvasContext = canvasOutput.getContext('2d');
+
+startAndStop.addEventListener('click', () => {
+    if (!streaming) {
+        utils.clearError();
+        utils.startCamera('qvga', onVideoStarted, 'videoInput');
+    } else {
+        utils.stopCamera();
+        onVideoStopped();
+    }
+});
+
+function onVideoStarted() {
+    streaming = true;
+    startAndStop.innerText = 'Stop';
+    videoInput.width = videoInput.videoWidth;
+    videoInput.height = videoInput.videoHeight;
+    utils.executeCode('codeEditor');
+}
+
+function onVideoStopped() {
+    streaming = false;
+    canvasContext.clearRect(0, 0, canvasOutput.width, canvasOutput.height);
+    startAndStop.innerText = 'Start';
+}
+
+utils.loadOpenCv(() => {
+    let faceCascadeFile = 'haarcascade_frontalface_default.xml';
+    utils.createFileFromUrl(faceCascadeFile, faceCascadeFile, () => {
+        startAndStop.removeAttribute('disabled');
+    });
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_filtering_GaussianBlur.html b/doc/js_tutorials/js_assets/js_filtering_GaussianBlur.html
new file mode 100644 (file)
index 0000000..1fc2033
--- /dev/null
@@ -0,0 +1,68 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Gaussian Blur Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Gaussian Blur Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let ksize = new cv.Size(3, 3);
+// You can try more different parameters
+cv.GaussianBlur(src, dst, ksize, 0, 0, cv.BORDER_DEFAULT);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_filtering_bilateralFilter.html b/doc/js_tutorials/js_assets/js_filtering_bilateralFilter.html
new file mode 100644 (file)
index 0000000..e67b967
--- /dev/null
@@ -0,0 +1,68 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Bilateral Filter Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Bilateral Filter Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+cv.cvtColor(src, src, cv.COLOR_RGBA2RGB, 0);
+// You can try more different parameters
+cv.bilateralFilter(src, dst, 9, 75, 75, cv.BORDER_DEFAULT);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_filtering_blur.html b/doc/js_tutorials/js_assets/js_filtering_blur.html
new file mode 100644 (file)
index 0000000..9fc8528
--- /dev/null
@@ -0,0 +1,70 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Blur Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Blur Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let ksize = new cv.Size(3, 3);
+let anchor = new cv.Point(-1, -1);
+// You can try more different parameters
+cv.blur(src, dst, ksize, anchor, cv.BORDER_DEFAULT);
+// cv.boxFilter(src, dst, -1, ksize, anchor, true, cv.BORDER_DEFAULT)
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_filtering_filter.html b/doc/js_tutorials/js_assets/js_filtering_filter.html
new file mode 100644 (file)
index 0000000..df0e1fa
--- /dev/null
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Filter Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Filter Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let M = cv.Mat.eye(3, 3, cv.CV_32FC1);
+let anchor = new cv.Point(-1, -1);
+// You can try more different parameters
+cv.filter2D(src, dst, cv.CV_8U, M, anchor, 0, cv.BORDER_DEFAULT);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_filtering_medianBlur.html b/doc/js_tutorials/js_assets/js_filtering_medianBlur.html
new file mode 100644 (file)
index 0000000..093b52e
--- /dev/null
@@ -0,0 +1,67 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Median Blur Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Median Blur Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+// You can try more different parameters
+cv.medianBlur(src, dst, 5);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_fourier_transform_dft.html b/doc/js_tutorials/js_assets/js_fourier_transform_dft.html
new file mode 100644 (file)
index 0000000..df3f73a
--- /dev/null
@@ -0,0 +1,128 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image DFT Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image DFT Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+
+// get optimal size of DFT
+let optimalRows = cv.getOptimalDFTSize(src.rows);
+let optimalCols = cv.getOptimalDFTSize(src.cols);
+let s0 = cv.Scalar.all(0);
+let padded = new cv.Mat();
+cv.copyMakeBorder(src, padded, 0, optimalRows - src.rows, 0,
+                  optimalCols - src.cols, cv.BORDER_CONSTANT, s0);
+
+// use cv.MatVector to distribute space for real part and imaginary part
+let plane0 = new cv.Mat();
+padded.convertTo(plane0, cv.CV_32F);
+let planes = new cv.MatVector();
+let complexI = new cv.Mat();
+let plane1 = new cv.Mat.zeros(padded.cols, padded.rows, cv.CV_32F);
+planes.push_back(plane0);
+planes.push_back(plane1);
+cv.merge(planes, complexI);
+
+// in-place dft transfrom
+cv.dft(complexI, complexI);
+
+// compute log(1 + sqrt(Re(DFT(img))**2 + Im(DFT(img))**2))
+cv.split(complexI, planes);
+cv.magnitude(planes.get(0), planes.get(1), planes.get(0));
+let mag = planes.get(0);
+let m1 = new cv.Mat.ones(mag.rows, mag.cols, mag.type());
+cv.add(mag, m1, mag);
+cv.log(mag, mag);
+
+// crop the spectrum, if it has an odd number of rows or columns
+let rect = new cv.Rect(0, 0, mag.cols & -2, mag.rows & -2);
+mag = mag.roi(rect);
+
+// rearrange the quadrants of Fourier image
+// so that the origin is at the image center
+let cx = mag.cols / 2;
+let cy = mag.rows / 2;
+let tmp = new cv.Mat();
+
+let rect0 = new cv.Rect(0, 0, cx, cy);
+let rect1 = new cv.Rect(cx, 0, cx, cy);
+let rect2 = new cv.Rect(0, cy, cx, cy);
+let rect3 = new cv.Rect(cx, cy, cx, cy);
+
+let q0 = mag.roi(rect0);
+let q1 = mag.roi(rect1);
+let q2 = mag.roi(rect2);
+let q3 = mag.roi(rect3);
+
+// exchange 1 and 4 quadrants
+q0.copyTo(tmp);
+q3.copyTo(q0);
+tmp.copyTo(q3);
+
+// exchange 2 and 3 quadrants
+q1.copyTo(tmp);
+q2.copyTo(q1);
+tmp.copyTo(q2);
+
+// The pixel value of cv.CV_32S type image ranges from 0 to 1.
+cv.normalize(mag, mag, 0, 1, cv.NORM_MINMAX);
+
+cv.imshow('canvasOutput', mag);
+src.delete(); padded.delete(); planes.delete(); complexI.delete(); m1.delete(); tmp.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_geometric_transformations_getAffineTransform.html b/doc/js_tutorials/js_assets/js_geometric_transformations_getAffineTransform.html
new file mode 100644 (file)
index 0000000..48aa2b2
--- /dev/null
@@ -0,0 +1,74 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Get Affine Transform Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Get Affine Transform Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+// (data32F[0], data32F[1]) is the first point
+// (data32F[2], data32F[3]) is the sescond point
+// (data32F[4], data32F[5]) is the third point
+let srcTri = cv.matFromArray(3, 1, cv.CV_32FC2, [0, 0, 0, 1, 1, 0]);
+let dstTri = cv.matFromArray(3, 1, cv.CV_32FC2, [0.6, 0.2, 0.1, 1.3, 1.5, 0.3]);
+let dsize = new cv.Size(src.rows, src.cols);
+let M = cv.getAffineTransform(srcTri, dstTri);
+// You can try more different parameters
+cv.warpAffine(src, dst, M, dsize, cv.INTER_LINEAR, cv.BORDER_CONSTANT, new cv.Scalar());
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); M.delete(); srcTri.delete(); dstTri.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_geometric_transformations_resize.html b/doc/js_tutorials/js_assets/js_geometric_transformations_resize.html
new file mode 100644 (file)
index 0000000..5100b94
--- /dev/null
@@ -0,0 +1,68 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Resize Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Resize Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let dsize = new cv.Size(300, 300);
+// You can try more different parameters
+cv.resize(src, dst, dsize, 0, 0, cv.INTER_AREA);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_geometric_transformations_rotateWarpAffine.html b/doc/js_tutorials/js_assets/js_geometric_transformations_rotateWarpAffine.html
new file mode 100644 (file)
index 0000000..798d1ce
--- /dev/null
@@ -0,0 +1,70 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Rotate Transform Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Rotate Transform Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let dsize = new cv.Size(src.rows, src.cols);
+let center = new cv.Point(src.cols / 2, src.rows / 2);
+// You can try more different parameters
+let M = cv.getRotationMatrix2D(center, 45, 1);
+cv.warpAffine(src, dst, M, dsize, cv.INTER_LINEAR, cv.BORDER_CONSTANT, new cv.Scalar());
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_geometric_transformations_warpAffine.html b/doc/js_tutorials/js_assets/js_geometric_transformations_warpAffine.html
new file mode 100644 (file)
index 0000000..a7e54d6
--- /dev/null
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Affine Transform Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Affine Transform Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let M = cv.matFromArray(2, 3, cv.CV_64FC1, [1, 0, 50, 0, 1, 100]);
+let dsize = new cv.Size(src.rows, src.cols);
+// You can try more different parameters
+cv.warpAffine(src, dst, M, dsize, cv.INTER_LINEAR, cv.BORDER_CONSTANT, new cv.Scalar());
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_geometric_transformations_warpPerspective.html b/doc/js_tutorials/js_assets/js_geometric_transformations_warpPerspective.html
new file mode 100644 (file)
index 0000000..5608b91
--- /dev/null
@@ -0,0 +1,75 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Perspectiv Transform Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Perspectiv Transform Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let dsize = new cv.Size(src.rows, src.cols);
+// (data32F[0], data32F[1]) is the first point
+// (data32F[2], data32F[3]) is the sescond point
+// (data32F[4], data32F[5]) is the third point
+// (data32F[6], data32F[7]) is the fourth point
+let srcTri = cv.matFromArray(4, 1, cv.CV_32FC2, [56, 65, 368, 52, 28, 387, 389, 390]);
+let dstTri = cv.matFromArray(4, 1, cv.CV_32FC2, [0, 0, 300, 0, 0, 300, 300, 300]);
+let M = cv.getPerspectiveTransform(srcTri, dstTri);
+// You can try more different parameters
+cv.warpPerspective(src, dst, M, dsize, cv.INTER_LINEAR, cv.BORDER_CONSTANT, new cv.Scalar());
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); M.delete(); srcTri.delete(); dstTri.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_grabcut_grabCut.html b/doc/js_tutorials/js_assets/js_grabcut_grabCut.html
new file mode 100644 (file)
index 0000000..c8347f9
--- /dev/null
@@ -0,0 +1,85 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image GrabCut Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image GrabCut Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+cv.cvtColor(src, src, cv.COLOR_RGBA2RGB, 0);
+let mask = new cv.Mat();
+let bgdModel = new cv.Mat();
+let fgdModel = new cv.Mat();
+let rect = new cv.Rect(50, 50, 260, 280);
+cv.grabCut(src, mask, rect, bgdModel, fgdModel, 1, cv.GC_INIT_WITH_RECT);
+// draw foreground
+for (let i = 0; i < src.rows; i++) {
+    for (let j = 0; j < src.cols; j++) {
+        if (mask.ucharPtr(i, j)[0] == 0 || mask.ucharPtr(i, j)[0] == 2) {
+            src.ucharPtr(i, j)[0] = 0;
+            src.ucharPtr(i, j)[1] = 0;
+            src.ucharPtr(i, j)[2] = 0;
+        }
+    }
+}
+// draw grab rect
+let color = new cv.Scalar(0, 0, 255);
+let point1 = new cv.Point(rect.x, rect.y);
+let point2 = new cv.Point(rect.x + rect.width, rect.y + rect.height);
+cv.rectangle(src, point1, point2, color);
+cv.imshow('canvasOutput', src);
+src.delete(); mask.delete(); bgdModel.delete(); fgdModel.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_gradients_Laplacian.html b/doc/js_tutorials/js_assets/js_gradients_Laplacian.html
new file mode 100644 (file)
index 0000000..0ada3a3
--- /dev/null
@@ -0,0 +1,68 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Laplacian Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Laplacian Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+cv.cvtColor(src, src, cv.COLOR_RGB2GRAY, 0);
+// You can try more different parameters
+cv.Laplacian(src, dst, cv.CV_8U, 1, 1, 0, cv.BORDER_DEFAULT);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_gradients_Sobel.html b/doc/js_tutorials/js_assets/js_gradients_Sobel.html
new file mode 100644 (file)
index 0000000..676eb86
--- /dev/null
@@ -0,0 +1,79 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Sobel Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Sobel Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b>, <b>canvasOutputx</b> and <b>canvasOutputy</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput" class="small"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutputx" class="small"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutputy" class="small"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutputx</div>
+        </td>
+        <td>
+            <div class="caption">canvasOutputy</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dstx = new cv.Mat();
+let dsty = new cv.Mat();
+cv.cvtColor(src, src, cv.COLOR_RGB2GRAY, 0);
+// You can try more different parameters
+cv.Sobel(src, dstx, cv.CV_8U, 1, 0, 3, 1, 0, cv.BORDER_DEFAULT);
+cv.Sobel(src, dsty, cv.CV_8U, 0, 1, 3, 1, 0, cv.BORDER_DEFAULT);
+// cv.Scharr(src, dstx, cv.CV_8U, 1, 0, 1, 0, cv.BORDER_DEFAULT);
+// cv.Scharr(src, dsty, cv.CV_8U, 0, 1, 1, 0, cv.BORDER_DEFAULT);
+cv.imshow('canvasOutputx', dstx);
+cv.imshow('canvasOutputy', dsty);
+src.delete(); dstx.delete(); dsty.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_gradients_absSobel.html b/doc/js_tutorials/js_assets/js_gradients_absSobel.html
new file mode 100644 (file)
index 0000000..3b304f7
--- /dev/null
@@ -0,0 +1,78 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image AbsSobel Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image AbsSobel Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b>, <b>canvasOutput8U</b> and <b>canvasOutput64F</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput" class="small"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput8U" class="small"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput64F" class="small"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput8U</div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput64F</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dstx = new cv.Mat();
+let absDstx = new cv.Mat();
+cv.cvtColor(src, src, cv.COLOR_RGB2GRAY, 0);
+// You can try more different parameters
+cv.Sobel(src, dstx, cv.CV_8U, 1, 0, 3, 1, 0, cv.BORDER_DEFAULT);
+cv.Sobel(src, absDstx, cv.CV_64F, 1, 0, 3, 1, 0, cv.BORDER_DEFAULT);
+cv.convertScaleAbs(absDstx, absDstx, 1, 0);
+cv.imshow('canvasOutput8U', dstx);
+cv.imshow('canvasOutput64F', absDstx);
+src.delete(); dstx.delete(); absDstx.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_histogram_backprojection_calcBackProject.html b/doc/js_tutorials/js_assets/js_histogram_backprojection_calcBackProject.html
new file mode 100644 (file)
index 0000000..b3881c8
--- /dev/null
@@ -0,0 +1,90 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Back Project Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Back Project Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>srcCanvasInput</b>, <b>dstCanvasInput</b> and <b>canvasInput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="srcCanvasInput" class="small"></canvas>
+        </td>
+        <td>
+            <canvas id="dstCanvasInput" class="small"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput" class="small"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">srcCanvasInput <input type="file" id="srcFileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">dstCanvasInput <input type="file" id="dstFileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('srcCanvasInput');
+let dst = cv.imread('dstCanvasInput');
+cv.cvtColor(src, src, cv.COLOR_RGB2HSV, 0);
+cv.cvtColor(dst, dst, cv.COLOR_RGB2HSV, 0);
+let srcVec = new cv.MatVector();
+let dstVec = new cv.MatVector();
+srcVec.push_back(src); dstVec.push_back(dst);
+let backproj = new cv.Mat();
+let none = new cv.Mat();
+let mask = new cv.Mat();
+let hist = new cv.Mat();
+let channels = [0];
+let histSize = [50];
+let ranges = [0, 180];
+let accumulate = false;
+cv.calcHist(srcVec, channels, mask, hist, histSize, ranges, accumulate);
+cv.normalize(hist, hist, 0, 255, cv.NORM_MINMAX, -1, none);
+cv.calcBackProject(dstVec, channels, hist, backproj, ranges, 1);
+cv.imshow('canvasOutput', backproj);
+src.delete(); dst.delete(); srcVec.delete(); dstVec.delete();
+backproj.delete(); mask.delete(); hist.delete(); none.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('handSrc.jpg', 'srcCanvasInput');
+utils.loadImageToCanvas('handDst.jpg', 'dstCanvasInput');
+utils.addFileInputHandler('srcFileInput', 'srcCanvasInput');
+utils.addFileInputHandler('dstFileInput', 'dstCanvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_histogram_begins_calcHist.html b/doc/js_tutorials/js_assets/js_histogram_begins_calcHist.html
new file mode 100644 (file)
index 0000000..49b0495
--- /dev/null
@@ -0,0 +1,88 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Histogram Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Histogram Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+let srcVec = new cv.MatVector();
+srcVec.push_back(src);
+let accumulate = false;
+let channels = [0];
+let histSize = [256];
+let ranges = [0, 255];
+let hist = new cv.Mat();
+let mask = new cv.Mat();
+let color = new cv.Scalar(255, 255, 255);
+let scale = 2;
+// You can try more different parameters
+cv.calcHist(srcVec, channels, mask, hist, histSize, ranges, accumulate);
+let result = cv.minMaxLoc(hist, mask);
+let max = result.maxVal;
+let dst = new cv.Mat.zeros(src.rows, histSize[0] * scale,
+                           cv.CV_8UC3);
+// draw histogram
+for (let i = 0; i < histSize[0]; i++) {
+    let binVal = hist.data32F[i] * src.rows / max;
+    let pioint1 = new cv.Point(i * scale, src.rows - 1);
+    let pioint2 = new cv.Point((i + 1) * scale - 1, src.rows - binVal);
+    cv.rectangle(dst, pioint1, pioint2, color, cv.FILLED);
+}
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); srcVec.delete(); mask.delete(); hist.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_histogram_equalization_createCLAHE.html b/doc/js_tutorials/js_assets/js_histogram_equalization_createCLAHE.html
new file mode 100644 (file)
index 0000000..cd70b19
--- /dev/null
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image CLAHE Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image CLAHE Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let equalDst = new cv.Mat();
+let claheDst = new cv.Mat();
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.equalizeHist(src, equalDst);
+let tileGridSize = new cv.Size(8, 8);
+// You can try more different parameters
+let clahe = new cv.CLAHE(40, tileGridSize);
+clahe.apply(src, claheDst);
+cv.imshow('canvasOutput', equalDst);
+cv.imshow('canvasOutput', claheDst);
+src.delete(); equalDst.delete(); claheDst.delete(); clahe.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_histogram_equalization_equalizeHist.html b/doc/js_tutorials/js_assets/js_histogram_equalization_equalizeHist.html
new file mode 100644 (file)
index 0000000..873d2f4
--- /dev/null
@@ -0,0 +1,68 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Equalize Histogram Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Equalize Histogram Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.equalizeHist(src, dst);
+cv.imshow('canvasOutput', src);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_houghcircles_HoughCirclesP.html b/doc/js_tutorials/js_assets/js_houghcircles_HoughCirclesP.html
new file mode 100644 (file)
index 0000000..f707adb
--- /dev/null
@@ -0,0 +1,79 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Hough Circles Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Hough Circles Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8U);
+let circles = new cv.Mat();
+let color = new cv.Scalar(255, 0, 0);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+// You can try more different parameters
+cv.HoughCircles(src, circles, cv.HOUGH_GRADIENT,
+                1, 45, 75, 40, 0, 0);
+// draw circles
+for (let i = 0; i < circles.cols; ++i) {
+    let x = circles.data32F[i * 3];
+    let y = circles.data32F[i * 3 + 1];
+    let radius = circles.data32F[i * 3 + 2];
+    let center = new cv.Point(x, y);
+    cv.circle(dst, center, radius, color);
+}
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); circles.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('coins.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_houghlines_HoughLines.html b/doc/js_tutorials/js_assets/js_houghlines_HoughLines.html
new file mode 100644 (file)
index 0000000..2edbe39
--- /dev/null
@@ -0,0 +1,83 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Hough Lines Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Hough Lines Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8U);
+let lines = new cv.Mat();
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.Canny(src, src, 50, 200, 3);
+// You can try more different parameters
+cv.HoughLines(src, lines, 1, Math.PI / 180,
+              30, 0, 0, 0, Math.PI);
+// draw lines
+for (let i = 0; i < lines.rows; ++i) {
+    let rho = lines.data32F[i * 2];
+    let theta = lines.data32F[i * 2 + 1];
+    let a = Math.cos(theta);
+    let b = Math.sin(theta);
+    let x0 = a * rho;
+    let y0 = b * rho;
+    let startPoint = {x: x0 - 1000 * b, y: y0 + 1000 * a};
+    let endPoint = {x: x0 + 1000 * b, y: y0 - 1000 * a};
+    cv.line(dst, startPoint, endPoint, [255, 0, 0, 255]);
+}
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); lines.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_houghlines_HoughLinesP.html b/doc/js_tutorials/js_assets/js_houghlines_HoughLinesP.html
new file mode 100644 (file)
index 0000000..e69bf54
--- /dev/null
@@ -0,0 +1,77 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image HoughLinesP Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image HoughLinesP Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = cv.Mat.zeros(src.rows, src.cols, cv.CV_8U);
+let lines = new cv.Mat();
+let color = new cv.Scalar(255, 0, 0);
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+cv.Canny(src, src, 50, 200, 3);
+// You can try more different parameters
+cv.HoughLinesP(src, lines, 1, Math.PI / 180, 2, 0, 0);
+// draw lines
+for (let i = 0; i < lines.rows; ++i) {
+    let startPoint = new cv.Point(lines.data32S[i * 4], lines.data32S[i * 4 + 1]);
+    let endPoint = new cv.Point(lines.data32S[i * 4 + 2], lines.data32S[i * 4 + 3]);
+    cv.line(dst, startPoint, endPoint, color);
+}
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); lines.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_image_arithmetics_bitwise.html b/doc/js_tutorials/js_assets/js_image_arithmetics_bitwise.html
new file mode 100644 (file)
index 0000000..05c0ffd
--- /dev/null
@@ -0,0 +1,114 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Bitwise Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Bitwise Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>imageCanvasInput</b>, <b>logoCanvasInput</b> and <b>CanvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="imageCanvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">imageCanvasInput <input type="file" id="imageFileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <canvas id="logoCanvasInput"></canvas>
+        </td>
+        <td>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">logoCanvasInput <input type="file" id="logoFileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('imageCanvasInput');
+let logo = cv.imread('logoCanvasInput');
+let dst = new cv.Mat();
+let roi = new cv.Mat();
+let mask = new cv.Mat();
+let maskInv = new cv.Mat();
+let imgBg = new cv.Mat();
+let imgFg = new cv.Mat();
+let sum = new cv.Mat();
+let rect = new cv.Rect(0, 0, logo.cols, logo.rows);
+
+// I want to put logo on top-left corner, So I create a ROI
+roi = src.roi(rect);
+
+// Create a mask of logo and create its inverse mask also
+cv.cvtColor(logo, mask, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(mask, mask, 100, 255, cv.THRESH_BINARY);
+cv.bitwise_not(mask, maskInv);
+
+// Black-out the area of logo in ROI
+cv.bitwise_and(roi, roi, imgBg, maskInv);
+
+// Take only region of logo from logo image
+cv.bitwise_and(logo, logo, imgFg, mask);
+
+// Put logo in ROI and modify the main image
+cv.add(imgBg, imgFg, sum);
+
+dst = src.clone();
+for (let i = 0; i < logo.rows; i++) {
+    for (let j = 0; j < logo.cols; j++) {
+        dst.ucharPtr(i, j)[0] = sum.ucharPtr(i, j)[0];
+    }
+}
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); logo.delete(); roi.delete(); mask.delete();
+maskInv.delete(); imgBg.delete(); imgFg.delete(); sum.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'imageCanvasInput');
+utils.loadImageToCanvas('lenaFace.png', 'logoCanvasInput');
+utils.addFileInputHandler('imageFileInput', 'imageCanvasInput');
+utils.addFileInputHandler('logoFileInput', 'logoCanvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_image_display.html b/doc/js_tutorials/js_assets/js_image_display.html
new file mode 100644 (file)
index 0000000..0e4ff18
--- /dev/null
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Read and Show Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Read and Show Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="8" cols="80" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+// To distinguish the input and output, we graying the image.
+// You can try different conversions.
+cv.cvtColor(src, dst, cv.COLOR_RGBA2GRAY);
+cv.imshow('canvasOutput', dst);
+src.delete();
+dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_imgproc_camera.html b/doc/js_tutorials/js_assets/js_imgproc_camera.html
new file mode 100644 (file)
index 0000000..2df68d7
--- /dev/null
@@ -0,0 +1,700 @@
+<!DOCTYPE html>
+<html >
+<head>
+<meta charset="utf-8">
+<title>Image Processing Video Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+<style type="text/css">
+.dg {
+    text-align: left;
+}
+.dg .property-name {
+    font: 11px Lucida Grande,sans-serif;
+    line-height: 27px;
+}
+.dg.main .close-button {
+    font: 11px Lucida Grande,sans-serif;
+    line-height: 27px;
+}
+.cell-top {
+    vertical-align: top;
+}
+</style>
+</head>
+<body>
+<h2>Image Processing Video Example</h2>
+<p>
+    Open the controls and try different image processing filters.
+</p>
+<p class="err" id="errorMessage"></p>
+<div id="container">
+    <table>
+        <tr>
+            <td></td>
+            <td>
+                <div>
+                    <span>Current Filter: </span><span id="filterName">Pass Through</span>
+                </div>
+            </td>
+            <td>
+                <div>Select Filter:</div>
+            </td>
+            <td></td>
+        </tr>
+        <tr>
+            <td></td>
+            <td class="cell-top">
+                <canvas id="canvasOutput" width="640" height="480"></canvas>
+            </td>
+            <td class="cell-top">
+                <div id="guiContainer"></div>
+            </td>
+            <td></td>
+        </tr>
+    </table>
+    <div>
+        <video id="videoInput" class="hidden">Your browser does not support the video tag.</video>
+    </div>
+</div>
+<script src="https://webrtc.github.io/adapter/adapter-5.0.4.js" type="text/javascript"></script>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/stats.js/r16/Stats.min.js" type="text/javascript"></script>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/dat-gui/0.6.4/dat.gui.min.js" type="text/javascript"></script>
+<script src="utils.js" type="text/javascript"></script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+let width = 0;
+let height = 0;
+
+let resolution = window.innerWidth < 960 ? 'qvga' : 'vga';
+
+// whether streaming video from the camera.
+let streaming = false;
+
+let video = document.getElementById('videoInput');
+let vc = null;
+
+let container = document.getElementById('container');
+
+let lastFilter = '';
+let src = null;
+let dstC1 = null;
+let dstC3 = null;
+let dstC4 = null;
+
+function startVideoProcessing() {
+    src = new cv.Mat(height, width, cv.CV_8UC4);
+    dstC1 = new cv.Mat(height, width, cv.CV_8UC1);
+    dstC3 = new cv.Mat(height, width, cv.CV_8UC3);
+    dstC4 = new cv.Mat(height, width, cv.CV_8UC4);
+    requestAnimationFrame(processVideo);
+}
+
+function passThrough(src) {
+    return src;
+}
+
+function gray(src) {
+    cv.cvtColor(src, dstC1, cv.COLOR_RGBA2GRAY);
+    return dstC1;
+}
+
+function hsv(src) {
+    cv.cvtColor(src, dstC3, cv.COLOR_RGBA2RGB);
+    cv.cvtColor(dstC3, dstC3, cv.COLOR_RGB2HSV);
+    return dstC3;
+}
+
+function canny(src) {
+    cv.cvtColor(src, dstC1, cv.COLOR_RGBA2GRAY);
+    cv.Canny(dstC1, dstC1, controls.cannyThreshold1, controls.cannyThreshold2,
+             controls.cannyApertureSize, controls.cannyL2Gradient);
+    return dstC1;
+}
+
+function inRange(src) {
+    let lowValue = controls.inRangeLow;
+    let lowScalar = new cv.Scalar(lowValue, lowValue, lowValue, 255);
+    let highValue = controls.inRangeHigh;
+    let highScalar = new cv.Scalar(highValue, highValue, highValue, 255);
+    let low = new cv.Mat(height, width, src.type(), lowScalar);
+    let high = new cv.Mat(height, width, src.type(), highScalar);
+    cv.inRange(src, low, high, dstC1);
+    low.delete(); high.delete();
+    return dstC1;
+}
+
+function threshold(src) {
+    cv.threshold(src, dstC4, controls.thresholdValue, 200, cv.THRESH_BINARY);
+    return dstC4;
+}
+
+function adaptiveThreshold(src) {
+    let mat = new cv.Mat(height, width, cv.CV_8U);
+    cv.cvtColor(src, mat, cv.COLOR_RGBA2GRAY);
+    cv.adaptiveThreshold(mat, dstC1, 200, cv.ADAPTIVE_THRESH_GAUSSIAN_C,
+                         cv.THRESH_BINARY, Number(controls.adaptiveBlockSize), 2);
+    mat.delete();
+    return dstC1;
+}
+
+function gaussianBlur(src) {
+    cv.GaussianBlur(src, dstC4,
+                    {width: controls.gaussianBlurSize, height: controls.gaussianBlurSize},
+                    0, 0, cv.BORDER_DEFAULT);
+    return dstC4;
+}
+
+function bilateralFilter(src) {
+    let mat = new cv.Mat(height, width, cv.CV_8UC3);
+    cv.cvtColor(src, mat, cv.COLOR_RGBA2RGB);
+    cv.bilateralFilter(mat, dstC3, controls.bilateralFilterDiameter, controls.bilateralFilterSigma,
+                       controls.bilateralFilterSigma, cv.BORDER_DEFAULT);
+    mat.delete();
+    return dstC3;
+}
+
+function medianBlur(src) {
+    cv.medianBlur(src, dstC4, controls.medianBlurSize);
+    return dstC4;
+}
+
+function sobel(src) {
+    let mat = new cv.Mat(height, width, cv.CV_8UC1);
+    cv.cvtColor(src, mat, cv.COLOR_RGB2GRAY, 0);
+    cv.Sobel(mat, dstC1, cv.CV_8U, 1, 0, controls.sobelSize, 1, 0, cv.BORDER_DEFAULT);
+    mat.delete();
+    return dstC1;
+}
+
+function scharr(src) {
+    let mat = new cv.Mat(height, width, cv.CV_8UC1);
+    cv.cvtColor(src, mat, cv.COLOR_RGB2GRAY, 0);
+    cv.Scharr(mat, dstC1, cv.CV_8U, 1, 0, 1, 0, cv.BORDER_DEFAULT);
+    mat.delete();
+    return dstC1;
+}
+
+function laplacian(src) {
+    let mat = new cv.Mat(height, width, cv.CV_8UC1);
+    cv.cvtColor(src, mat, cv.COLOR_RGB2GRAY);
+    cv.Laplacian(mat, dstC1, cv.CV_8U, controls.laplacianSize, 1, 0, cv.BORDER_DEFAULT);
+    mat.delete();
+    return dstC1;
+}
+
+let contoursColor = [];
+for (let i = 0; i < 10000; i++) {
+    contoursColor.push([Math.round(Math.random() * 255),
+                        Math.round(Math.random() * 255),
+                        Math.round(Math.random() * 255), 0]);
+}
+
+function contours(src) {
+    cv.cvtColor(src, dstC1, cv.COLOR_RGBA2GRAY);
+    cv.threshold(dstC1, dstC4, 120, 200, cv.THRESH_BINARY);
+    let contours = new cv.MatVector();
+    let hierarchy = new cv.Mat();
+    cv.findContours(dstC4, contours, hierarchy,
+                    Number(controls.contoursMode),
+                    Number(controls.contoursMethod), {x: 0, y: 0});
+    dstC3.delete();
+    dstC3 = cv.Mat.ones(height, width, cv.CV_8UC3);
+    for (let i = 0; i<contours.size(); ++i) {
+        let color = contoursColor[i];
+        cv.drawContours(dstC3, contours, i, color, 1, cv.LINE_8, hierarchy);
+    }
+    contours.delete(); hierarchy.delete();
+    return dstC3;
+}
+
+function calcHist(src) {
+    cv.cvtColor(src, dstC1, cv.COLOR_RGBA2GRAY);
+    let srcVec = new cv.MatVector();
+    srcVec.push_back(dstC1);
+    let scale = 2;
+    let channels = [0];
+    let histSize = [src.cols/scale];
+    const ranges = [0, 255];
+    let hist = new cv.Mat();
+    let mask = new cv.Mat();
+    let color = new cv.Scalar(0xfb, 0xca, 0x04, 0xff);
+    cv.calcHist(srcVec, channels, mask, hist, histSize, ranges);
+    let result = cv.minMaxLoc(hist, mask);
+    let max = result.maxVal;
+    cv.cvtColor(dstC1, dstC4, cv.COLOR_GRAY2RGBA);
+    // draw histogram on src
+    for (let i = 0; i < histSize[0]; i++) {
+        let binVal = hist.data32F[i] * src.rows / max;
+        cv.rectangle(dstC4, {x: i * scale, y: src.rows - 1},
+                     {x: (i + 1) * scale - 1, y: src.rows - binVal/3}, color, cv.FILLED);
+    }
+    srcVec.delete();
+    mask.delete();
+    hist.delete();
+    return dstC4;
+}
+
+function equalizeHist(src) {
+    cv.cvtColor(src, dstC1, cv.COLOR_RGBA2GRAY, 0);
+    cv.equalizeHist(dstC1, dstC1);
+    return dstC1;
+}
+
+let base;
+
+function backprojection(src) {
+    if (lastFilter !== 'backprojection') {
+        if (base instanceof cv.Mat) {
+            base.delete();
+        }
+        base = src.clone();
+        cv.cvtColor(base, base, cv.COLOR_RGB2HSV, 0);
+    }
+    cv.cvtColor(src, dstC3, cv.COLOR_RGB2HSV, 0);
+    let baseVec = new cv.MatVector();
+    let targetVec = new cv.MatVector();
+    baseVec.push_back(base); targetVec.push_back(dstC3);
+    let mask = new cv.Mat();
+    let hist = new cv.Mat();
+    let channels = [0];
+    let histSize = [50];
+    let ranges;
+    if (controls.backprojectionRangeLow < controls.backprojectionRangeHigh) {
+        ranges = [controls.backprojectionRangeLow, controls.backprojectionRangeHigh];
+    } else {
+        return src;
+    }
+    cv.calcHist(baseVec, channels, mask, hist, histSize, ranges);
+    cv.normalize(hist, hist, 0, 255, cv.NORM_MINMAX);
+    cv.calcBackProject(targetVec, channels, hist, dstC1, ranges, 1);
+    baseVec.delete();
+    targetVec.delete();
+    mask.delete();
+    hist.delete();
+    return dstC1;
+}
+
+function erosion(src) {
+    let kernelSize = controls.erosionSize;
+    let kernel = cv.Mat.ones(kernelSize, kernelSize, cv.CV_8U);
+    let color = new cv.Scalar();
+    cv.erode(src, dstC4, kernel, {x: -1, y: -1}, 1, Number(controls.erosionBorderType), color);
+    kernel.delete();
+    return dstC4;
+}
+
+function dilation(src) {
+    let kernelSize = controls.dilationSize;
+    let kernel = cv.Mat.ones(kernelSize, kernelSize, cv.CV_8U);
+    let color = new cv.Scalar();
+    cv.dilate(src, dstC4, kernel, {x: -1, y: -1}, 1, Number(controls.dilationBorderType), color);
+    kernel.delete();
+    return dstC4;
+}
+
+function morphology(src) {
+    let kernelSize = controls.morphologySize;
+    let kernel = cv.getStructuringElement(Number(controls.morphologyShape),
+                                          {width: kernelSize, height: kernelSize});
+    let color = new cv.Scalar();
+    let op = Number(controls.morphologyOp);
+    let image = src;
+    if (op === cv.MORPH_GRADIENT || op === cv.MORPH_TOPHAT || op === cv.MORPH_BLACKHAT) {
+        cv.cvtColor(src, dstC3, cv.COLOR_RGBA2RGB);
+        image = dstC3;
+    }
+    cv.morphologyEx(image, dstC4, op, kernel, {x: -1, y: -1}, 1,
+                    Number(controls.morphologyBorderType), color);
+    kernel.delete();
+    return dstC4;
+}
+
+function processVideo() {
+    if (!streaming) return;
+    stats.begin();
+    vc.read(src);
+    let result;
+    switch (controls.filter) {
+        case 'passThrough': result = passThrough(src); break;
+        case 'gray': result = gray(src); break;
+        case 'hsv': result = hsv(src); break;
+        case 'canny': result = canny(src); break;
+        case 'inRange': result = inRange(src); break;
+        case 'threshold': result = threshold(src); break;
+        case 'adaptiveThreshold': result = adaptiveThreshold(src); break;
+        case 'gaussianBlur': result = gaussianBlur(src); break;
+        case 'bilateralFilter': result = bilateralFilter(src); break;
+        case 'medianBlur': result = medianBlur(src); break;
+        case 'sobel': result = sobel(src); break;
+        case 'scharr': result = scharr(src); break;
+        case 'laplacian': result = laplacian(src); break;
+        case 'contours': result = contours(src); break;
+        case 'calcHist': result = calcHist(src); break;
+        case 'equalizeHist': result = equalizeHist(src); break;
+        case 'backprojection': result = backprojection(src); break;
+        case 'erosion': result = erosion(src); break;
+        case 'dilation': result = dilation(src); break;
+        case 'morphology': result = morphology(src); break;
+        default: result = passThrough(src);
+    }
+    cv.imshow('canvasOutput', result);
+    stats.end();
+    lastFilter = controls.filter;
+    requestAnimationFrame(processVideo);
+}
+
+let stats = null;
+
+let filters = {
+    'passThrough': 'Pass Through',
+    'gray': 'Gray',
+    'hsv': 'HSV',
+    'canny': 'Canny Edge Detection',
+    'inRange': 'In Range',
+    'threshold': 'Threshold',
+    'adaptiveThreshold': 'Adaptive Threshold',
+    'gaussianBlur': 'Gaussian Blurring',
+    'medianBlur': 'Median Blurring',
+    'bilateralFilter': 'Bilateral Filtering',
+    'sobel': 'Sobel Derivatives',
+    'scharr': 'Scharr Derivatives',
+    'laplacian': 'Laplacian Derivatives',
+    'contours': 'Contours',
+    'calcHist': 'Calculation',
+    'equalizeHist': 'Equalization',
+    'backprojection': 'Backprojection',
+    'erosion': 'Erosion',
+    'dilation': 'Dilation',
+    'morphology': 'Morphology',
+};
+
+let filterName = document.getElementById('filterName');
+
+let controls;
+
+function initUI() {
+    stats = new Stats();
+    stats.showPanel(0);
+    container.appendChild(stats.domElement);
+    stats.domElement.style.position = 'absolute';
+    stats.domElement.style.right = '0px';
+    stats.domElement.style.top = '0px';
+
+    controls = {
+        filter: 'passThrough',
+        setFilter: function(filter) {
+            this.filter = filter;
+            filterName.innerHTML = filters[filter];
+        },
+        passThrough: function() {
+            this.setFilter('passThrough');
+        },
+        gray: function() {
+            this.setFilter('gray');
+        },
+        hsv: function() {
+            this.setFilter('hsv');
+        },
+        inRange: function() {
+            this.setFilter('inRange');
+        },
+        inRangeLow: 75,
+        inRangeHigh: 150,
+        threshold: function() {
+            this.setFilter('threshold');
+        },
+        thresholdValue: 100,
+        adaptiveThreshold: function() {
+            this.setFilter('adaptiveThreshold');
+        },
+        adaptiveBlockSize: 3,
+        gaussianBlur: function() {
+            this.setFilter('gaussianBlur');
+        },
+        gaussianBlurSize: 7,
+        medianBlur: function() {
+            this.setFilter('medianBlur');
+        },
+        medianBlurSize: 5,
+        bilateralFilter: function() {
+            this.setFilter('bilateralFilter');
+        },
+        bilateralFilterDiameter: 5,
+        bilateralFilterSigma: 75,
+        sobel: function() {
+            this.setFilter('sobel');
+        },
+        sobelSize: 3,
+        scharr: function() {
+            this.setFilter('scharr');
+        },
+        laplacian: function() {
+            this.setFilter('laplacian');
+        },
+        laplacianSize: 3,
+        canny: function() {
+            this.setFilter('canny');
+        },
+        cannyThreshold1: 150,
+        cannyThreshold2: 300,
+        cannyApertureSize: 3,
+        cannyL2Gradient: false,
+        contours: function() {
+            this.setFilter('contours');
+        },
+        contoursMode: cv.RETR_CCOMP,
+        contoursMethod: cv.CHAIN_APPROX_SIMPLE,
+        calcHist: function() {
+            this.setFilter('calcHist');
+        },
+        equalizeHist: function() {
+            this.setFilter('equalizeHist');
+        },
+        backprojection: function() {
+            this.setFilter('backprojection');
+        },
+        backprojectionRangeLow: 0,
+        backprojectionRangeHigh: 150,
+        morphology: function() {
+            this.setFilter('morphology');
+        },
+        morphologyShape: cv.MORPH_RECT,
+        morphologyOp: cv.MORPH_ERODE,
+        morphologySize: 5,
+        morphologyBorderType: cv.BORDER_CONSTANT,
+    };
+
+    let gui = new dat.GUI({autoPlace: false});
+    let guiContainer = document.getElementById('guiContainer');
+    guiContainer.appendChild(gui.domElement);
+
+    let lastFolder = null;
+    function closeLastFolder(folder) {
+        if (lastFolder != null && lastFolder != folder) {
+            lastFolder.close();
+        }
+        lastFolder = folder;
+    }
+
+    gui.add(controls, 'passThrough').name(filters['passThrough']).onChange(function() {
+        closeLastFolder(null);
+    });
+
+    let colorConversion = gui.addFolder('Color Conversion');
+    colorConversion.add(controls, 'gray').name(filters['gray']).onChange(function() {
+        closeLastFolder(null);
+    });
+
+    colorConversion.add(controls, 'hsv').name(filters['hsv']).onChange(function() {
+        closeLastFolder(null);
+    });
+
+    let inRange = colorConversion.addFolder(filters['inRange']);
+    inRange.domElement.onclick = function() {
+        closeLastFolder(inRange);
+        controls.inRange();
+    };
+    inRange.add(controls, 'inRangeLow', 0, 255, 1).name('lower boundary');
+    inRange.add(controls, 'inRangeHigh', 0, 255, 1).name('higher boundary');
+
+    // let geometricTransformations = gui.addFolder('Geometric Transformations');
+    // TODO
+
+    let thresholding = gui.addFolder('Thresholding');
+
+    let threshold = thresholding.addFolder(filters['threshold']);
+    threshold.domElement.onclick = function() {
+        closeLastFolder(threshold);
+        controls.threshold();
+    };
+    threshold.add(controls, 'thresholdValue', 0, 200, 1).name('threshold value');
+
+    let adaptiveThreshold = thresholding.addFolder(filters['adaptiveThreshold']);
+    adaptiveThreshold.domElement.onclick = function() {
+        closeLastFolder(adaptiveThreshold);
+        controls.adaptiveThreshold();
+    };
+    adaptiveThreshold.add(
+        controls, 'adaptiveBlockSize', 3, 99, 1).name('block size').onChange(
+        function(value) {
+            if (value % 2 === 0) controls.adaptiveBlockSize = value + 1;
+        });
+
+    let smoothing = gui.addFolder('Smoothing');
+
+    let gaussianBlur = smoothing.addFolder(filters['gaussianBlur']);
+    gaussianBlur.domElement.onclick = function() {
+        closeLastFolder(gaussianBlur);
+        controls.gaussianBlur();
+    };
+    gaussianBlur.add(
+        controls, 'gaussianBlurSize', 7, 99, 1).name('kernel size').onChange(
+        function(value) {
+            if (value % 2 === 0) controls.gaussianBlurSize = value + 1;
+        });
+
+    let medianBlur = smoothing.addFolder(filters['medianBlur']);
+    medianBlur.domElement.onclick = function() {
+        closeLastFolder(medianBlur);
+        controls.medianBlur();
+    };
+    medianBlur.add(
+        controls, 'medianBlurSize', 3, 99, 1).name('kernel size').onChange(
+        function(value) {
+            if (value % 2 === 0) controls.medianBlurSize = value + 1;
+        });
+
+    let bilateralFilter = smoothing.addFolder(filters['bilateralFilter']);
+    bilateralFilter.domElement.onclick = function() {
+        closeLastFolder(bilateralFilter);
+        controls.bilateralFilter();
+    };
+    bilateralFilter.add(controls, 'bilateralFilterDiameter', 1, 15, 1).name('diameter');
+    bilateralFilter.add(controls, 'bilateralFilterSigma', 1, 255, 1).name('sigma');
+
+    let morphology = gui.addFolder('Morphology');
+    morphology.domElement.onclick = function() {
+        closeLastFolder(morphology);
+        controls.morphology();
+    };
+    morphology.add(
+        controls, 'morphologyOp',
+        {'MORPH_ERODE': cv.MORPH_ERODE,
+         'MORPH_DILATE': cv.MORPH_DILATE,
+         'MORPH_OPEN ': cv.MORPH_OPEN,
+         'MORPH_CLOSE': cv.MORPH_CLOSE,
+         'MORPH_GRADIENT': cv.MORPH_GRADIENT,
+         'MORPH_TOPHAT': cv.MORPH_TOPHAT,
+         'MORPH_BLACKHAT': cv.MORPH_BLACKHAT}).name('operation');
+    morphology.add(
+        controls, 'morphologyShape',
+        {'MORPH_RECT': cv.MORPH_RECT,
+         'MORPH_CROSS': cv.MORPH_CROSS,
+         'MORPH_ELLIPSE': cv.MORPH_ELLIPSE}).name('shape');
+    morphology.add(
+        controls, 'morphologySize', 1, 15, 1).name('kernel size').onChange(
+        function(value) {
+            if (value % 2 === 0) controls.morphologySize = value + 1;
+        });
+    morphology.add(
+        controls, 'morphologyBorderType',
+        {'BORDER_CONSTANT': cv.BORDER_CONSTANT,
+         'BORDER_REPLICATE': cv.BORDER_REPLICATE,
+         'BORDER_REFLECT': cv.BORDER_REFLECT,
+         'BORDER_REFLECT_101': cv.BORDER_REFLECT_101}).name('boarder type');
+
+    let gradients = gui.addFolder('Gradients');
+    let sobel = gradients.addFolder(filters['sobel']);
+    sobel.domElement.onclick = function() {
+        closeLastFolder(sobel);
+        controls.sobel();
+    };
+    sobel.add(controls, 'sobelSize', 3, 19, 1).name('kernel size').onChange(function(value) {
+        if (value % 2 === 0) controls.sobelSize = value + 1;
+    });
+
+    gradients.add(controls, 'scharr').name(filters['scharr']).onChange(function() {
+        closeLastFolder(null);
+    });
+
+    let laplacian = gradients.addFolder(filters['laplacian']);
+    laplacian.domElement.onclick = function() {
+        closeLastFolder(laplacian);
+        controls.laplacian();
+    };
+    laplacian.add(
+        controls, 'laplacianSize', 1, 19, 1).name('kernel size').onChange(
+        function(value) {
+            if (value % 2 === 0) controls.laplacianSize = value + 1;
+        });
+
+    let canny = gui.addFolder(filters['canny']);
+    canny.domElement.onclick = function() {
+        closeLastFolder(canny);
+        controls.canny();
+    };
+    canny.add(controls, 'cannyThreshold1', 1, 500, 1).name('threshold1');
+    canny.add(controls, 'cannyThreshold2', 1, 500, 1).name('threshold2');
+    canny.add(controls, 'cannyApertureSize', 3, 7, 1).name('aperture size').onChange(
+        function(value) {
+            if (value % 2 === 0) controls.cannyApertureSize = value + 1;
+        });
+    canny.add(controls, 'cannyL2Gradient').name('l2 gradient');
+
+    let contours = gui.addFolder(filters['contours']);
+    contours.domElement.onclick = function() {
+        closeLastFolder(contours);
+        controls.contours();
+    };
+    contours.add(
+        controls, 'contoursMode',
+        {'RETR_EXTERNAL': cv.RETR_EXTERNAL,
+         'RETR_LIST': cv.RETR_LIST,
+         'RETR_CCOMP': cv.RETR_CCOMP,
+         'RETR_TREE': cv.RETR_TREE}).name('mode');
+    contours.add(
+        controls, 'contoursMethod',
+        {'CHAIN_APPROX_NONE': cv.CHAIN_APPROX_NONE,
+         'CHAIN_APPROX_SIMPLE': cv.CHAIN_APPROX_SIMPLE,
+         'CHAIN_APPROX_TC89_L1': cv.CHAIN_APPROX_TC89_L1,
+         'CHAIN_APPROX_TC89_KCOS': cv.CHAIN_APPROX_TC89_KCOS}).name('method');
+
+    let histograms = gui.addFolder('Histograms');
+    histograms.add(controls, 'calcHist').name(filters['calcHist']).onChange(function() {
+        closeLastFolder(null);
+    });
+    histograms.add(controls, 'equalizeHist').name(filters['equalizeHist']).onChange(function() {
+        closeLastFolder(null);
+    });
+
+    let backprojection = histograms.addFolder(filters['backprojection']);
+    backprojection.domElement.onclick = function() {
+        closeLastFolder(backprojection);
+        controls.backprojection();
+    };
+    backprojection.add(controls, 'backprojectionRangeLow', 0, 255, 1).name('range low');
+    backprojection.add(controls, 'backprojectionRangeHigh', 0, 255, 1).name('range high');
+}
+
+function startCamera() {
+    if (!streaming) {
+        utils.clearError();
+        utils.startCamera(resolution, onVideoStarted, 'videoInput');
+    } else {
+        utils.stopCamera();
+        onVideoStopped();
+    }
+}
+
+function onVideoStarted() {
+    height = video.videoHeight;
+    width = video.videoWidth;
+    video.setAttribute('width', width);
+    video.setAttribute('height', height);
+    streaming = true;
+    vc = new cv.VideoCapture(video);
+    startVideoProcessing();
+}
+
+function stopVideoProcessing() {
+    if (src != null && !src.isDeleted()) src.delete();
+    if (dstC1 != null && !dstC1.isDeleted()) dstC1.delete();
+    if (dstC3 != null && !dstC3.isDeleted()) dstC3.delete();
+    if (dstC4 != null && !dstC4.isDeleted()) dstC4.delete();
+}
+
+function onVideoStopped() {
+    if (!streaming) return;
+    stopVideoProcessing();
+    document.getElementById('canvasOutput').getContext('2d').clearRect(0, 0, width, height);
+    streaming = false;
+}
+
+utils.loadOpenCv(() => {
+    initUI();
+    startCamera();
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_meanshift.html b/doc/js_tutorials/js_assets/js_meanshift.html
new file mode 100644 (file)
index 0000000..9e29002
--- /dev/null
@@ -0,0 +1,170 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>MeanShift Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>MeanShift Example</h2>
+<p>
+    Click <b>Start/Stop</b> button to start or stop the video.<br>
+    The <b>videoInput</b> is a &lt;video&gt; element used as meanShift input.
+    The <b>canvasOutput</b> is a &lt;canvas&gt; element used as meanShift output.<br>
+    The code of &lt;textarea&gt; will be executed when video is started.
+    You can modify the code to investigate more.
+</p>
+<div>
+<div class="control"><button id="startAndStop" disabled>Start</button></div>
+<textarea class="code" rows="29" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+</div>
+<p class="err" id="errorMessage"></p>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <video id="videoInput" width="320" height="240" muted loop></video>
+        </td>
+        <td>
+            <canvas id="canvasOutput" width="320" height="240" ></canvas>
+        </td>
+        <td></td>
+        <td></td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">videoInput</div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+        <td></td>
+        <td></td>
+    </tr>
+    </table>
+</div>
+<script src="https://webrtc.github.io/adapter/adapter-5.0.4.js" type="text/javascript"></script>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let video = document.getElementById('videoInput');
+let cap = new cv.VideoCapture(video);
+
+// take first frame of the video
+let frame = new cv.Mat(video.height, video.width, cv.CV_8UC4);
+cap.read(frame);
+
+// hardcode the initial location of window
+let trackWindow = new cv.Rect(150, 60, 63, 125);
+
+// set up the ROI for tracking
+let roi = frame.roi(trackWindow);
+let hsvRoi = new cv.Mat();
+cv.cvtColor(roi, hsvRoi, cv.COLOR_RGBA2RGB);
+cv.cvtColor(hsvRoi, hsvRoi, cv.COLOR_RGB2HSV);
+let mask = new cv.Mat();
+let lowScalar = new cv.Scalar(30, 30, 0);
+let highScalar = new cv.Scalar(180, 180, 180);
+let low = new cv.Mat(hsvRoi.rows, hsvRoi.cols, hsvRoi.type(), lowScalar);
+let high = new cv.Mat(hsvRoi.rows, hsvRoi.cols, hsvRoi.type(), highScalar);
+cv.inRange(hsvRoi, low, high, mask);
+let roiHist = new cv.Mat();
+let hsvRoiVec = new cv.MatVector();
+hsvRoiVec.push_back(hsvRoi);
+cv.calcHist(hsvRoiVec, [0], mask, roiHist, [180], [0, 180]);
+cv.normalize(roiHist, roiHist, 0, 255, cv.NORM_MINMAX);
+
+// delete useless mats.
+roi.delete(); hsvRoi.delete(); mask.delete(); low.delete(); high.delete(); hsvRoiVec.delete();
+
+// Setup the termination criteria, either 10 iteration or move by atleast 1 pt
+let termCrit = new cv.TermCriteria(cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1);
+
+let hsv = new cv.Mat(video.height, video.width, cv.CV_8UC3);
+let dst = new cv.Mat();
+let hsvVec = new cv.MatVector();
+hsvVec.push_back(hsv);
+
+const FPS = 30;
+function processVideo() {
+    try {
+        if (!streaming) {
+            // clean and stop.
+            frame.delete(); dst.delete(); hsvVec.delete(); roiHist.delete(); hsv.delete();
+            return;
+        }
+        let begin = Date.now();
+
+        // start processing.
+        cap.read(frame);
+        cv.cvtColor(frame, hsv, cv.COLOR_RGBA2RGB);
+        cv.cvtColor(hsv, hsv, cv.COLOR_RGB2HSV);
+        cv.calcBackProject(hsvVec, [0], roiHist, dst, [0, 180], 1);
+
+        // Apply meanshift to get the new location
+        // and it also returns number of iterations meanShift took to converge,
+        // which is useless in this demo.
+        [, trackWindow] = cv.meanShift(dst, trackWindow, termCrit);
+
+        // Draw it on image
+        let [x, y, w, h] = [trackWindow.x, trackWindow.y, trackWindow.width, trackWindow.height];
+        cv.rectangle(frame, new cv.Point(x, y), new cv.Point(x+w, y+h), [255, 0, 0, 255], 2);
+        cv.imshow('canvasOutput', frame);
+
+        // schedule the next one.
+        let delay = 1000/FPS - (Date.now() - begin);
+        setTimeout(processVideo, delay);
+    } catch (err) {
+        utils.printError(err);
+    }
+};
+
+// schedule the first one.
+setTimeout(processVideo, 0);
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+
+let streaming = false;
+let videoInput = document.getElementById('videoInput');
+let startAndStop = document.getElementById('startAndStop');
+let canvasOutput = document.getElementById('canvasOutput');
+let canvasContext = canvasOutput.getContext('2d');
+
+startAndStop.addEventListener('click', () => {
+    if (!streaming) {
+        utils.clearError();
+        videoInput.play().then(() => {
+            onVideoStarted();
+        });
+    } else {
+        videoInput.pause();
+        videoInput.currentTime = 0;
+        onVideoStopped();
+    }
+});
+
+function onVideoStarted() {
+    streaming = true;
+    startAndStop.innerText = 'Stop';
+    videoInput.height = videoInput.width * (videoInput.videoHeight / videoInput.videoWidth);
+    utils.executeCode('codeEditor');
+}
+
+function onVideoStopped() {
+    streaming = false;
+    canvasContext.clearRect(0, 0, canvasOutput.width, canvasOutput.height);
+    startAndStop.innerText = 'Start';
+}
+
+utils.loadOpenCv(() => {
+    videoInput.addEventListener('canplay', () => {
+        startAndStop.removeAttribute('disabled');
+    });
+    videoInput.src = 'cup.mp4';
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_morphological_ops_blackHat.html b/doc/js_tutorials/js_assets/js_morphological_ops_blackHat.html
new file mode 100644 (file)
index 0000000..052520a
--- /dev/null
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Black Hat Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Black Hat Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+cv.cvtColor(src, src, cv.COLOR_RGBA2RGB);
+let dst = new cv.Mat();
+let M = cv.Mat.ones(53, 53, cv.CV_8U);
+// You can try more different parameters
+cv.morphologyEx(src, dst, cv.MORPH_BLACKHAT, M);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_morphological_ops_closing.html b/doc/js_tutorials/js_assets/js_morphological_ops_closing.html
new file mode 100644 (file)
index 0000000..9b364b1
--- /dev/null
@@ -0,0 +1,68 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Closing Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Closing Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let M = cv.Mat.ones(5, 5, cv.CV_8U);
+// You can try more different parameters
+cv.morphologyEx(src, dst, cv.MORPH_CLOSE, M);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_morphological_ops_dilate.html b/doc/js_tutorials/js_assets/js_morphological_ops_dilate.html
new file mode 100644 (file)
index 0000000..ca95b02
--- /dev/null
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Dilate Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Dilate Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let M = cv.Mat.ones(5, 5, cv.CV_8U);
+let anchor = new cv.Point(-1, -1);
+// You can try more different parameters
+cv.dilate(src, dst, M, anchor, 1, cv.BORDER_CONSTANT, cv.morphologyDefaultBorderValue());
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_morphological_ops_erode.html b/doc/js_tutorials/js_assets/js_morphological_ops_erode.html
new file mode 100644 (file)
index 0000000..5208155
--- /dev/null
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Erode Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Erode Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let M = cv.Mat.ones(5, 5, cv.CV_8U);
+let anchor = new cv.Point(-1, -1);
+// You can try more different parameters
+cv.erode(src, dst, M, anchor, 1, cv.BORDER_CONSTANT, cv.morphologyDefaultBorderValue());
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_morphological_ops_getStructuringElement.html b/doc/js_tutorials/js_assets/js_morphological_ops_getStructuringElement.html
new file mode 100644 (file)
index 0000000..348eefc
--- /dev/null
@@ -0,0 +1,71 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Get Structuring Element Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Get Structuring Element Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+cv.cvtColor(src, src, cv.COLOR_RGBA2RGB);
+let dst = new cv.Mat();
+let M = new cv.Mat();
+let ksize = new cv.Size(5, 5);
+// You can try more different parameters
+M = cv.getStructuringElement(cv.MORPH_CROSS, ksize);
+cv.morphologyEx(src, dst, cv.MORPH_GRADIENT, M);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_morphological_ops_gradient.html b/doc/js_tutorials/js_assets/js_morphological_ops_gradient.html
new file mode 100644 (file)
index 0000000..060d5d4
--- /dev/null
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Gradient Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Gradient Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+cv.cvtColor(src, src, cv.COLOR_RGBA2RGB);
+let dst = new cv.Mat();
+let M = cv.Mat.ones(5, 5, cv.CV_8U);
+// You can try more different parameters
+cv.morphologyEx(src, dst, cv.MORPH_GRADIENT, M);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_morphological_ops_opening.html b/doc/js_tutorials/js_assets/js_morphological_ops_opening.html
new file mode 100644 (file)
index 0000000..0e1a961
--- /dev/null
@@ -0,0 +1,70 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Opening Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Opening Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let M = cv.Mat.ones(5, 5, cv.CV_8U);
+let anchor = new cv.Point(-1, -1);
+// You can try more different parameters
+cv.morphologyEx(src, dst, cv.MORPH_OPEN, M, anchor, 1,
+                cv.BORDER_CONSTANT, cv.morphologyDefaultBorderValue());
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_morphological_ops_topHat.html b/doc/js_tutorials/js_assets/js_morphological_ops_topHat.html
new file mode 100644 (file)
index 0000000..26e6959
--- /dev/null
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Top Hat Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Top Hat Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+cv.cvtColor(src, src, cv.COLOR_RGBA2RGB);
+let dst = new cv.Mat();
+let M = cv.Mat.ones(9, 9, cv.CV_8U);
+// You can try more different parameters
+cv.morphologyEx(src, dst, cv.MORPH_TOPHAT, M);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('shape.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_optical_flow_dense.html b/doc/js_tutorials/js_assets/js_optical_flow_dense.html
new file mode 100644 (file)
index 0000000..83bd597
--- /dev/null
@@ -0,0 +1,163 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Dense Optical Flow Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Dense Optical Flow Example</h2>
+<p>
+    Click <b>Start/Stop</b> button to start or stop the video.<br>
+    The <b>videoInput</b> is a &lt;video&gt; element used as input.
+    The <b>canvasOutput</b> is a &lt;canvas&gt; element used as output.<br>
+    We get a 2-channel array with optical flow vectors, (u,v). We find their magnitude and direction.
+    We color code the result for better visualization. Direction corresponds to Hue value of the image.
+    Magnitude corresponds to Value plane.<br>
+    The code of &lt;textarea&gt; will be executed when video is started.<br>
+    You can modify the code to investigate more.
+</p>
+<div>
+<div class="control"><button id="startAndStop" disabled>Start</button></div>
+<textarea class="code" rows="29" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+</div>
+<p class="err" id="errorMessage"></p>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <video id="videoInput" width="320" height="240" muted></video>
+        </td>
+        <td>
+            <canvas id="canvasOutput" width="320" height="240" ></canvas>
+        </td>
+        <td></td>
+        <td></td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">videoInput</div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+        <td></td>
+        <td></td>
+    </tr>
+    </table>
+</div>
+<script src="https://webrtc.github.io/adapter/adapter-5.0.4.js" type="text/javascript"></script>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let video = document.getElementById('videoInput');
+let cap = new cv.VideoCapture(video);
+
+// take first frame of the video
+let frame1 = new cv.Mat(video.height, video.width, cv.CV_8UC4);
+cap.read(frame1);
+
+let prvs = new cv.Mat();
+cv.cvtColor(frame1, prvs, cv.COLOR_RGBA2GRAY);
+frame1.delete();
+let hsv = new cv.Mat();
+let hsv0 = new cv.Mat(video.height, video.width, cv.CV_8UC1);
+let hsv1 = new cv.Mat(video.height, video.width, cv.CV_8UC1, new cv.Scalar(255));
+let hsv2 = new cv.Mat(video.height, video.width, cv.CV_8UC1);
+let hsvVec = new cv.MatVector();
+hsvVec.push_back(hsv0); hsvVec.push_back(hsv1); hsvVec.push_back(hsv2);
+
+let frame2 = new cv.Mat(video.height, video.width, cv.CV_8UC4);
+let next = new cv.Mat(video.height, video.width, cv.CV_8UC1);
+let flow = new cv.Mat(video.height, video.width, cv.CV_32FC2);
+let flowVec = new cv.MatVector();
+let mag = new cv.Mat(video.height, video.width, cv.CV_32FC1);
+let ang = new cv.Mat(video.height, video.width, cv.CV_32FC1);
+let rgb = new cv.Mat(video.height, video.width, cv.CV_8UC3);
+
+const FPS = 30;
+function processVideo() {
+    try {
+        if (!streaming) {
+            // clean and stop.
+            prvs.delete(); hsv.delete(); hsv0.delete(); hsv1.delete(); hsv2.delete();
+            hsvVec.delete(); frame2.delete(); flow.delete(); flowVec.delete(); next.delete();
+            mag.delete(); ang.delete(); rgb.delete();
+            return;
+        }
+        let begin = Date.now();
+
+        // start processing.
+        cap.read(frame2);
+        cv.cvtColor(frame2, next, cv.COLOR_RGBA2GRAY);
+        cv.calcOpticalFlowFarneback(prvs, next, flow, 0.5, 3, 15, 3, 5, 1.2, 0);
+        cv.split(flow, flowVec);
+        let u = flowVec.get(0);
+        let v = flowVec.get(1);
+        cv.cartToPolar(u, v, mag, ang);
+        u.delete(); v.delete();
+        ang.convertTo(hsv0, cv.CV_8UC1, 180/Math.PI/2);
+        cv.normalize(mag, hsv2, 0, 255, cv.NORM_MINMAX, cv.CV_8UC1);
+        cv.merge(hsvVec, hsv);
+        cv.cvtColor(hsv, rgb, cv.COLOR_HSV2RGB);
+        cv.imshow('canvasOutput', rgb);
+        next.copyTo(prvs);
+
+        // schedule the next one.
+        let delay = 1000/FPS - (Date.now() - begin);
+        setTimeout(processVideo, delay);
+    } catch (err) {
+        utils.printError(err);
+    }
+};
+
+// schedule the first one.
+setTimeout(processVideo, 0);
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+
+let streaming = false;
+let videoInput = document.getElementById('videoInput');
+let startAndStop = document.getElementById('startAndStop');
+
+startAndStop.addEventListener('click', () => {
+    if (!streaming) {
+        utils.clearError();
+        videoInput.play().then(() => {
+            onVideoStarted();
+        });
+    } else {
+        videoInput.pause();
+        videoInput.currentTime = 0;
+        onVideoStopped();
+    }
+});
+
+function onVideoStarted() {
+    streaming = true;
+    startAndStop.innerText = 'Stop';
+    videoInput.height = videoInput.width * (videoInput.videoHeight / videoInput.videoWidth);
+    utils.executeCode('codeEditor');
+}
+
+function onVideoStopped() {
+    streaming = false;
+    startAndStop.innerText = 'Start';
+}
+
+videoInput.addEventListener('ended', () => {
+    onVideoStopped();
+});
+
+utils.loadOpenCv(() => {
+    videoInput.addEventListener('canplay', () => {
+        startAndStop.removeAttribute('disabled');
+    });
+    videoInput.src = 'box.mp4';
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_optical_flow_lucas_kanade.html b/doc/js_tutorials/js_assets/js_optical_flow_lucas_kanade.html
new file mode 100644 (file)
index 0000000..91f4d21
--- /dev/null
@@ -0,0 +1,190 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Lucas-Kanade Optical Flow Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Lucas-Kanade Optical Flow Example</h2>
+<p>
+    Click <b>Start/Stop</b> button to start or stop the video.<br>
+    The <b>videoInput</b> is a &lt;video&gt; element used as input.
+    The <b>canvasOutput</b> is a &lt;canvas&gt; element used as output.<br>
+    To decide the points, we use <b>cv.goodFeaturesToTrack()</b>. We take the first frame, detect some Shi-Tomasi corner points in it, then we iteratively track those points using <b>cv.calcOpticalFlowPyrLK</b>.<br>
+    The code of &lt;textarea&gt; will be executed when video is started.<br>
+    You can modify the code to investigate more.
+</p>
+<div>
+<div class="control"><button id="startAndStop" disabled>Start</button></div>
+<textarea class="code" rows="29" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+</div>
+<p class="err" id="errorMessage"></p>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <video id="videoInput" width="320" height="240" muted></video>
+        </td>
+        <td>
+            <canvas id="canvasOutput" width="320" height="240" ></canvas>
+        </td>
+        <td></td>
+        <td></td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">videoInput</div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+        <td></td>
+        <td></td>
+    </tr>
+    </table>
+</div>
+<script src="https://webrtc.github.io/adapter/adapter-5.0.4.js" type="text/javascript"></script>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let video = document.getElementById('videoInput');
+let cap = new cv.VideoCapture(video);
+
+// parameters for ShiTomasi corner detection
+let [maxCorners, qualityLevel, minDistance, blockSize] = [30, 0.3, 7, 7];
+
+// parameters for lucas kanade optical flow
+let winSize = new cv.Size(15, 15);
+let maxLevel = 2;
+let criteria = new cv.TermCriteria(cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03);
+
+// create some random colors
+let color = [];
+for (let i = 0; i < maxCorners; i++) {
+    color.push(new cv.Scalar(parseInt(Math.random()*255), parseInt(Math.random()*255),
+                             parseInt(Math.random()*255), 255));
+}
+
+// take first frame and find corners in it
+let oldFrame = new cv.Mat(video.height, video.width, cv.CV_8UC4);
+cap.read(oldFrame);
+let oldGray = new cv.Mat();
+cv.cvtColor(oldFrame, oldGray, cv.COLOR_RGB2GRAY);
+let p0 = new cv.Mat();
+let none = new cv.Mat();
+cv.goodFeaturesToTrack(oldGray, p0, maxCorners, qualityLevel, minDistance, none, blockSize);
+
+// Create a mask image for drawing purposes
+let zeroEle = new cv.Scalar(0, 0, 0, 255);
+let mask = new cv.Mat(oldFrame.rows, oldFrame.cols, oldFrame.type(), zeroEle);
+
+let frame = new cv.Mat(video.height, video.width, cv.CV_8UC4);
+let frameGray = new cv.Mat();
+let p1 = new cv.Mat();
+let st = new cv.Mat();
+let err = new cv.Mat();
+
+const FPS = 30;
+function processVideo() {
+    try {
+        if (!streaming) {
+            // clean and stop.
+            frame.delete(); oldGray.delete(); p0.delete(); p1.delete(); err.delete(); mask.delete();
+            return;
+        }
+        let begin = Date.now();
+
+        // start processing.
+        cap.read(frame);
+        cv.cvtColor(frame, frameGray, cv.COLOR_RGBA2GRAY);
+
+        // calculate optical flow
+        cv.calcOpticalFlowPyrLK(oldGray, frameGray, p0, p1, st, err, winSize, maxLevel, criteria);
+
+        // select good points
+        let goodNew = [];
+        let goodOld = [];
+        for (let i = 0; i < st.rows; i++) {
+            if (st.data[i] === 1) {
+                goodNew.push(new cv.Point(p1.data32F[i*2], p1.data32F[i*2+1]));
+                goodOld.push(new cv.Point(p0.data32F[i*2], p0.data32F[i*2+1]));
+            }
+        }
+
+        // draw the tracks
+        for (let i = 0; i < goodNew.length; i++) {
+            cv.line(mask, goodNew[i], goodOld[i], color[i], 2);
+            cv.circle(frame, goodNew[i], 5, color[i], -1);
+        }
+        cv.add(frame, mask, frame);
+
+        cv.imshow('canvasOutput', frame);
+
+        // now update the previous frame and previous points
+        frameGray.copyTo(oldGray);
+        p0.delete(); p0 = null;
+        p0 = new cv.Mat(goodNew.length, 1, cv.CV_32FC2);
+        for (let i = 0; i < goodNew.length; i++) {
+            p0.data32F[i*2] = goodNew[i].x;
+            p0.data32F[i*2+1] = goodNew[i].y;
+        }
+
+        // schedule the next one.
+        let delay = 1000/FPS - (Date.now() - begin);
+        setTimeout(processVideo, delay);
+    } catch (err) {
+        utils.printError(err);
+    }
+};
+
+// schedule the first one.
+setTimeout(processVideo, 0);
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+
+let streaming = false;
+let videoInput = document.getElementById('videoInput');
+let startAndStop = document.getElementById('startAndStop');
+
+startAndStop.addEventListener('click', () => {
+    if (!streaming) {
+        utils.clearError();
+        videoInput.play().then(() => {
+            onVideoStarted();
+        });
+    } else {
+        videoInput.pause();
+        videoInput.currentTime = 0;
+        onVideoStopped();
+    }
+});
+
+function onVideoStarted() {
+    streaming = true;
+    startAndStop.innerText = 'Stop';
+    videoInput.height = videoInput.width * (videoInput.videoHeight / videoInput.videoWidth);
+    utils.executeCode('codeEditor');
+}
+
+function onVideoStopped() {
+    streaming = false;
+    startAndStop.innerText = 'Start';
+}
+
+videoInput.addEventListener('ended', () => {
+    onVideoStopped();
+});
+
+utils.loadOpenCv(() => {
+    videoInput.addEventListener('canplay', () => {
+        startAndStop.removeAttribute('disabled');
+    });
+    videoInput.src = 'box.mp4';
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_pyramids_pyrDown.html b/doc/js_tutorials/js_assets/js_pyramids_pyrDown.html
new file mode 100644 (file)
index 0000000..6db4c02
--- /dev/null
@@ -0,0 +1,67 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image PyrDown Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image PyrDown Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+// You can try more different parameters
+cv.pyrDown(src, dst, new cv.Size(0, 0), cv.BORDER_DEFAULT);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_pyramids_pyrUp.html b/doc/js_tutorials/js_assets/js_pyramids_pyrUp.html
new file mode 100644 (file)
index 0000000..4f69903
--- /dev/null
@@ -0,0 +1,67 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image PyrUp Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image PyrUp Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+// You can try more different parameters
+cv.pyrUp(src, dst, new cv.Size(0, 0), cv.BORDER_DEFAULT);
+cv.imshow('canvasOutput', dst);
+src.delete(); dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_setup_usage.html b/doc/js_tutorials/js_assets/js_setup_usage.html
new file mode 100644 (file)
index 0000000..4f00dc3
--- /dev/null
@@ -0,0 +1,60 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Hello OpenCV.js</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Hello OpenCV.js</h2>
+<p id="status">OpenCV.js is loading...</p>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <img id="imageSrc" alt="No Image" class="small" />
+        </td>
+        <td>
+            <canvas id="canvasOutput" class="small" height="300px"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">imageSrc <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script type="text/javascript">
+let imgElement = document.getElementById('imageSrc');
+let inputElement = document.getElementById('fileInput');
+inputElement.addEventListener('change', (e) => {
+    imgElement.src = URL.createObjectURL(e.target.files[0]);
+}, false);
+
+imgElement.onload = function() {
+    let mat = cv.imread(imgElement);
+    cv.imshow('canvasOutput', mat);
+    mat.delete();
+};
+
+function onOpenCvReady() { // eslint-disable-line no-unused-vars
+    document.getElementById('status').innerHTML = '<b>OpenCV.js is ready</b>.' +
+        'You can upload an image.<br>' +
+        'The <b>imageSrc</b> is a &lt;img&gt; element used as cv.Mat input. ' +
+        'The <b>canvasOutput</b> is a &lt;canvas&gt; element used as cv.Mat output.';
+}
+
+function onOpenCvError() { // eslint-disable-line no-unused-vars
+    let element = document.getElementById('status');
+    element.setAttribute('class', 'err');
+    element.innerHTML = 'Failed to load opencv.js';
+}
+</script>
+<script async src="opencv.js" type="text/javascript" onload="onOpenCvReady();" onerror="onOpenCvError();"></script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_template_matching_matchTemplate.html b/doc/js_tutorials/js_assets/js_template_matching_matchTemplate.html
new file mode 100644 (file)
index 0000000..ad2bb54
--- /dev/null
@@ -0,0 +1,89 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Template Match Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Template Match Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>imageCanvasInput</b>, <b>templateCanvasInput</b>
+    and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="imageCanvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">imageCanvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <canvas id="templateCanvasInput"></canvas>
+        </td>
+        <td>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">templateCanvasInput <input type="file" id="templateFileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('imageCanvasInput');
+let templ = cv.imread('templateCanvasInput');
+let dst = new cv.Mat();
+let mask = new cv.Mat();
+cv.matchTemplate(src, templ, dst, cv.TM_CCOEFF, mask);
+let result = cv.minMaxLoc(dst, mask);
+let maxPoint = result.maxLoc;
+let color = new cv.Scalar(255, 0, 0, 255);
+let point = new cv.Point(maxPoint.x + templ.cols, maxPoint.y + templ.rows);
+cv.rectangle(src, maxPoint, point, color, 2, cv.LINE_8, 0);
+cv.imshow('canvasOutput', src);
+src.delete(); dst.delete(); mask.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'imageCanvasInput');
+utils.loadImageToCanvas('lenaFace.png', 'templateCanvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_thresholding_adaptiveThreshold.html b/doc/js_tutorials/js_assets/js_thresholding_adaptiveThreshold.html
new file mode 100644 (file)
index 0000000..0ac1efb
--- /dev/null
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Adaptive Threshold Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Adaptive Threshold Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0);
+// You can try more different parameters
+cv.adaptiveThreshold(src, dst, 200, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY, 3, 2);
+cv.imshow('canvasOutput', dst);
+src.delete();
+dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_thresholding_threshold.html b/doc/js_tutorials/js_assets/js_thresholding_threshold.html
new file mode 100644 (file)
index 0000000..67d74b0
--- /dev/null
@@ -0,0 +1,68 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Threshold Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Threshold Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+// You can try more different parameters
+cv.threshold(src, dst, 177, 200, cv.THRESH_BINARY);
+cv.imshow('canvasOutput', dst);
+src.delete();
+dst.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('lena.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_trackbar.html b/doc/js_tutorials/js_assets/js_trackbar.html
new file mode 100644 (file)
index 0000000..84e21b9
--- /dev/null
@@ -0,0 +1,89 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Trackbar Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Trackbar Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput1</b>, <b>canvasInput2</b> and <b>canvasOutput</b> have been prepared.<br>
+    The code of &lt;textarea&gt; will be executed when &lt;input&gt; element named <b>trackbar</b> value changes.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<textarea class="code" rows="12" cols="80" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <b>trackbar</b>
+    <input type="range" id="trackbar" disabled value="50" min="0" max="100" step="1">
+    <label id="weightValue" ></label>
+    <div>
+        <table cellpadding="0" cellspacing="0" width="0" border="0">
+        <tr>
+            <td>
+                <canvas id="canvasInput1" class="small"></canvas>
+            </td>
+            <td>
+                <canvas id="canvasInput2" class="small"></canvas>
+            </td>
+            <td>
+                <canvas id="canvasOutput" class="small"></canvas>
+            </td>
+        </tr>
+        <tr>
+            <td>
+                <div class="caption">canvasInput1</div>
+            </td>
+            <td>
+                <div class="caption">canvasInput2</div>
+            </td>
+            <td>
+                <div class="caption">canvasOutput</div>
+            </td>
+        </tr>
+        </table>
+    </div>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let trackbar = document.getElementById('trackbar');
+let alpha = trackbar.value/trackbar.max;
+let beta = ( 1.0 - alpha );
+let src1 = cv.imread('canvasInput1');
+let src2 = cv.imread('canvasInput2');
+let dst = new cv.Mat();
+cv.addWeighted( src1, alpha, src2, beta, 0.0, dst, -1);
+cv.imshow('canvasOutput', dst);
+dst.delete();
+src1.delete();
+src2.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('apple.jpg', 'canvasInput1');
+utils.loadImageToCanvas('orange.jpg', 'canvasInput2');
+
+let trackbar = document.getElementById('trackbar');
+trackbar.addEventListener('input', () => {
+    utils.executeCode('codeEditor');
+});
+
+let weightValue = document.getElementById('weightValue');
+weightValue.innerText = trackbar.value;
+trackbar.addEventListener('input', () => {
+    weightValue.innerText = trackbar.value;
+});
+
+utils.loadOpenCv(() => {
+    trackbar.removeAttribute('disabled');
+    utils.executeCode('codeEditor');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_video_display.html b/doc/js_tutorials/js_assets/js_video_display.html
new file mode 100644 (file)
index 0000000..f59c380
--- /dev/null
@@ -0,0 +1,120 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Video Capture Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Video Capture Example</h2>
+<p>
+    Click <b>Start/Stop</b> button to start or stop the camera capture.<br>
+    The <b>videoInput</b> is a &lt;video&gt; element used as OpenCV.js input.
+    The <b>canvasOutput</b> is a &lt;canvas&gt; element used as OpenCv.js output.<br>
+    The code of &lt;textarea&gt; will be executed when video is started.
+    You can modify the code to investigate more.
+</p>
+<div>
+<div class="control"><button id="startAndStop" disabled>Start</button></div>
+<textarea class="code" rows="29" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+</div>
+<p class="err" id="errorMessage"></p>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <video id="videoInput" width=320 height=240></video>
+        </td>
+        <td>
+            <canvas id="canvasOutput" width=320 height=240></canvas>
+        </td>
+        <td></td>
+        <td></td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">videoInput</div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+        <td></td>
+        <td></td>
+    </tr>
+    </table>
+</div>
+<script src="https://webrtc.github.io/adapter/adapter-5.0.4.js" type="text/javascript"></script>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let video = document.getElementById('videoInput');
+let src = new cv.Mat(video.height, video.width, cv.CV_8UC4);
+let dst = new cv.Mat(video.height, video.width, cv.CV_8UC1);
+let cap = new cv.VideoCapture(video);
+
+const FPS = 30;
+function processVideo() {
+    try {
+        if (!streaming) {
+            // clean and stop.
+            src.delete();
+            dst.delete();
+            return;
+        }
+        let begin = Date.now();
+        // start processing.
+        cap.read(src);
+        cv.cvtColor(src, dst, cv.COLOR_RGBA2GRAY);
+        cv.imshow('canvasOutput', dst);
+        // schedule the next one.
+        let delay = 1000/FPS - (Date.now() - begin);
+        setTimeout(processVideo, delay);
+    } catch (err) {
+        utils.printError(err);
+    }
+};
+
+// schedule the first one.
+setTimeout(processVideo, 0);
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+
+let streaming = false;
+let videoInput = document.getElementById('videoInput');
+let startAndStop = document.getElementById('startAndStop');
+let canvasOutput = document.getElementById('canvasOutput');
+let canvasContext = canvasOutput.getContext('2d');
+
+startAndStop.addEventListener('click', () => {
+    if (!streaming) {
+        utils.clearError();
+        utils.startCamera('qvga', onVideoStarted, 'videoInput');
+    } else {
+        utils.stopCamera();
+        onVideoStopped();
+    }
+});
+
+function onVideoStarted() {
+    streaming = true;
+    startAndStop.innerText = 'Stop';
+    videoInput.width = videoInput.videoWidth;
+    videoInput.height = videoInput.videoHeight;
+    utils.executeCode('codeEditor');
+}
+
+function onVideoStopped() {
+    streaming = false;
+    canvasContext.clearRect(0, 0, canvasOutput.width, canvasOutput.height);
+    startAndStop.innerText = 'Start';
+}
+
+utils.loadOpenCv(() => {
+    startAndStop.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_watershed_background.html b/doc/js_tutorials/js_assets/js_watershed_background.html
new file mode 100644 (file)
index 0000000..aa99016
--- /dev/null
@@ -0,0 +1,77 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Background Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Background Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let gray = new cv.Mat();
+let opening = new cv.Mat();
+let coinsBg = new cv.Mat();
+cv.cvtColor(src, gray, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(gray, gray, 0, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU);
+
+// get background
+let M = cv.Mat.ones(3, 3, cv.CV_8U);
+cv.erode(gray, gray, M);
+cv.dilate(gray, opening, M);
+cv.dilate(opening, coinsBg, M, new cv.Point(-1, -1), 3);
+
+cv.imshow('canvasOutput', coinsBg);
+src.delete(); dst.delete(); gray.delete(); opening.delete(); coinsBg.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('coins.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_watershed_distanceTransform.html b/doc/js_tutorials/js_assets/js_watershed_distanceTransform.html
new file mode 100644 (file)
index 0000000..7204706
--- /dev/null
@@ -0,0 +1,82 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Distance Transform Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Distance Transform Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let gray = new cv.Mat();
+let opening = new cv.Mat();
+let coinsBg = new cv.Mat();
+let coinsFg = new cv.Mat();
+let distTrans = new cv.Mat();
+cv.cvtColor(src, gray, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(gray, gray, 0, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU);
+let M = cv.Mat.ones(3, 3, cv.CV_8U);
+cv.erode(gray, gray, M);
+cv.dilate(gray, opening, M);
+cv.dilate(opening, coinsBg, M, new cv.Point(-1, -1), 3);
+
+// distance transform
+cv.distanceTransform(opening, distTrans, cv.DIST_L2, 5);
+cv.normalize(distTrans, distTrans, 1, 0, cv.NORM_INF);
+
+cv.imshow('canvasOutput', distTrans);
+src.delete(); dst.delete(); gray.delete(); opening.delete();
+coinsBg.delete(); coinsFg.delete(); distTrans.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('coins.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_watershed_foreground.html b/doc/js_tutorials/js_assets/js_watershed_foreground.html
new file mode 100644 (file)
index 0000000..e9a3418
--- /dev/null
@@ -0,0 +1,83 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Foreground Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Foreground Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let gray = new cv.Mat();
+let opening = new cv.Mat();
+let coinsBg = new cv.Mat();
+let coinsFg = new cv.Mat();
+let distTrans = new cv.Mat();
+cv.cvtColor(src, gray, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(gray, gray, 0, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU);
+let M = cv.Mat.ones(3, 3, cv.CV_8U);
+cv.erode(gray, gray, M);
+cv.dilate(gray, opening, M);
+cv.dilate(opening, coinsBg, M, new cv.Point(-1, -1), 3);
+cv.distanceTransform(opening, distTrans, cv.DIST_L2, 5);
+cv.normalize(distTrans, distTrans, 1, 0, cv.NORM_INF);
+
+// get foreground
+cv.threshold(distTrans, coinsFg, 0.7 * 1, 255, cv.THRESH_BINARY);
+
+cv.imshow('canvasOutput', coinsFg);
+src.delete(); dst.delete(); gray.delete(); opening.delete();
+coinsBg.delete(); coinsFg.delete(); distTrans.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('coins.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_watershed_threshold.html b/doc/js_tutorials/js_assets/js_watershed_threshold.html
new file mode 100644 (file)
index 0000000..ed212a0
--- /dev/null
@@ -0,0 +1,71 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Threshold Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Threshold Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let gray = new cv.Mat();
+
+// gray and threshold image
+cv.cvtColor(src, gray, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(gray, gray, 0, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU);
+
+cv.imshow('canvasOutput', gray);
+src.delete(); dst.delete(); gray.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('coins.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/js_watershed_watershed.html b/doc/js_tutorials/js_assets/js_watershed_watershed.html
new file mode 100644 (file)
index 0000000..f68c0fc
--- /dev/null
@@ -0,0 +1,110 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Image Watershed Example</title>
+<link href="js_example_style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<h2>Image Watershed Example</h2>
+<p>
+    &lt;canvas&gt; elements named <b>canvasInput</b> and <b>canvasOutput</b> have been prepared.<br>
+    Click <b>Try it</b> button to see the result. You can choose another image.<br>
+    You can change the code in the &lt;textarea&gt; to investigate more.
+</p>
+<div>
+<div class="control"><button id="tryIt" disabled>Try it</button></div>
+<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false">
+</textarea>
+<p class="err" id="errorMessage"></p>
+</div>
+<div>
+    <table cellpadding="0" cellspacing="0" width="0" border="0">
+    <tr>
+        <td>
+            <canvas id="canvasInput"></canvas>
+        </td>
+        <td>
+            <canvas id="canvasOutput"></canvas>
+        </td>
+    </tr>
+    <tr>
+        <td>
+            <div class="caption">canvasInput <input type="file" id="fileInput" name="file" accept="image/*" /></div>
+        </td>
+        <td>
+            <div class="caption">canvasOutput</div>
+        </td>
+    </tr>
+    </table>
+</div>
+<script src="utils.js" type="text/javascript"></script>
+<script id="codeSnippet" type="text/code-snippet">
+let src = cv.imread('canvasInput');
+let dst = new cv.Mat();
+let gray = new cv.Mat();
+let opening = new cv.Mat();
+let coinsBg = new cv.Mat();
+let coinsFg = new cv.Mat();
+let distTrans = new cv.Mat();
+let unknown = new cv.Mat();
+let markers = new cv.Mat();
+// gray and threshold image
+cv.cvtColor(src, gray, cv.COLOR_RGBA2GRAY, 0);
+cv.threshold(gray, gray, 0, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU);
+// get background
+let M = cv.Mat.ones(3, 3, cv.CV_8U);
+cv.erode(gray, gray, M);
+cv.dilate(gray, opening, M);
+cv.dilate(opening, coinsBg, M, new cv.Point(-1, -1), 3);
+// distance transform
+cv.distanceTransform(opening, distTrans, cv.DIST_L2, 5);
+cv.normalize(distTrans, distTrans, 1, 0, cv.NORM_INF);
+// get foreground
+cv.threshold(distTrans, coinsFg, 0.7 * 1, 255, cv.THRESH_BINARY);
+coinsFg.convertTo(coinsFg, cv.CV_8U, 1, 0);
+cv.subtract(coinsBg, coinsFg, unknown);
+// get connected components markers
+cv.connectedComponents(coinsFg, markers);
+for (let i = 0; i < markers.rows; i++) {
+    for (let j = 0; j < markers.cols; j++) {
+        markers.intPtr(i, j)[0] = markers.ucharPtr(i, j)[0] + 1;
+        if (unknown.ucharPtr(i, j)[0] == 255) {
+            markers.intPtr(i, j)[0] = 0;
+        }
+    }
+}
+cv.cvtColor(src, src, cv.COLOR_RGBA2RGB, 0);
+cv.watershed(src, markers);
+// draw barriers
+for (let i = 0; i < markers.rows; i++) {
+    for (let j = 0; j < markers.cols; j++) {
+        if (markers.intPtr(i, j)[0] == -1) {
+            src.ucharPtr(i, j)[0] = 255; // R
+            src.ucharPtr(i, j)[1] = 0; // G
+            src.ucharPtr(i, j)[2] = 0; // B
+        }
+    }
+}
+cv.imshow('canvasOutput', src);
+src.delete(); dst.delete(); gray.delete(); opening.delete(); coinsBg.delete();
+coinsFg.delete(); distTrans.delete(); unknown.delete(); markers.delete(); M.delete();
+</script>
+<script type="text/javascript">
+let utils = new Utils('errorMessage');
+
+utils.loadCode('codeSnippet', 'codeEditor');
+utils.loadImageToCanvas('coins.jpg', 'canvasInput');
+utils.addFileInputHandler('fileInput', 'canvasInput');
+
+let tryIt = document.getElementById('tryIt');
+tryIt.addEventListener('click', () => {
+    utils.executeCode('codeEditor');
+});
+
+utils.loadOpenCv(() => {
+    tryIt.removeAttribute('disabled');
+});
+</script>
+</body>
+</html>
diff --git a/doc/js_tutorials/js_assets/lena.jpg b/doc/js_tutorials/js_assets/lena.jpg
new file mode 100644 (file)
index 0000000..1e544aa
Binary files /dev/null and b/doc/js_tutorials/js_assets/lena.jpg differ
diff --git a/doc/js_tutorials/js_assets/lenaFace.png b/doc/js_tutorials/js_assets/lenaFace.png
new file mode 100644 (file)
index 0000000..d59376b
Binary files /dev/null and b/doc/js_tutorials/js_assets/lenaFace.png differ
diff --git a/doc/js_tutorials/js_assets/opencv_logo.jpg b/doc/js_tutorials/js_assets/opencv_logo.jpg
new file mode 100644 (file)
index 0000000..a2854e1
Binary files /dev/null and b/doc/js_tutorials/js_assets/opencv_logo.jpg differ
diff --git a/doc/js_tutorials/js_assets/orange.jpg b/doc/js_tutorials/js_assets/orange.jpg
new file mode 100644 (file)
index 0000000..1566afb
Binary files /dev/null and b/doc/js_tutorials/js_assets/orange.jpg differ
diff --git a/doc/js_tutorials/js_assets/shape.jpg b/doc/js_tutorials/js_assets/shape.jpg
new file mode 100644 (file)
index 0000000..a4acdcc
Binary files /dev/null and b/doc/js_tutorials/js_assets/shape.jpg differ
diff --git a/doc/js_tutorials/js_assets/utils.js b/doc/js_tutorials/js_assets/utils.js
new file mode 100644 (file)
index 0000000..8e56396
--- /dev/null
@@ -0,0 +1,153 @@
+function Utils(errorOutputId) { // eslint-disable-line no-unused-vars
+    let self = this;
+    this.errorOutput = document.getElementById(errorOutputId);
+
+    const OPENCV_URL = 'opencv.js';
+    this.loadOpenCv = function(onloadCallback) {
+        let script = document.createElement('script');
+        script.setAttribute('async', '');
+        script.setAttribute('type', 'text/javascript');
+        script.addEventListener('load', () => {
+            console.log(cv.getBuildInformation());
+            onloadCallback();
+        });
+        script.addEventListener('error', () => {
+            self.printError('Failed to load ' + OPENCV_URL);
+        });
+        script.src = OPENCV_URL;
+        let node = document.getElementsByTagName('script')[0];
+        node.parentNode.insertBefore(script, node);
+    };
+
+    this.createFileFromUrl = function(path, url, callback) {
+        let request = new XMLHttpRequest();
+        request.open('GET', url, true);
+        request.responseType = 'arraybuffer';
+        request.onload = function(ev) {
+            if (request.readyState === 4) {
+                if (request.status === 200) {
+                    let data = new Uint8Array(request.response);
+                    cv.FS_createDataFile('/', path, data, true, false, false);
+                    callback();
+                } else {
+                    self.printError('Failed to load ' + url + ' status: ' + request.status);
+                }
+            }
+        };
+        request.send();
+    };
+
+    this.loadImageToCanvas = function(url, cavansId) {
+        let canvas = document.getElementById(cavansId);
+        let ctx = canvas.getContext('2d');
+        let img = new Image();
+        img.crossOrigin = 'anonymous';
+        img.onload = function() {
+            canvas.width = img.width;
+            canvas.height = img.height;
+            ctx.drawImage(img, 0, 0, img.width, img.height);
+        };
+        img.src = url;
+    };
+
+    this.executeCode = function(textAreaId) {
+        try {
+            this.clearError();
+            let code = document.getElementById(textAreaId).value;
+            eval(code);
+        } catch (err) {
+            this.printError(err);
+        }
+    };
+
+    this.clearError = function() {
+        this.errorOutput.innerHTML = '';
+    };
+
+    this.printError = function(err) {
+        if (typeof err === 'undefined') {
+            err = '';
+        } else if (typeof err === 'number') {
+            if (!isNaN(err)) {
+                if (typeof cv !== 'undefined') {
+                    err = 'Exception: ' + cv.exceptionFromPtr(err).msg;
+                }
+            }
+        } else if (typeof err === 'string') {
+            let ptr = Number(err.split(' ')[0]);
+            if (!isNaN(ptr)) {
+                if (typeof cv !== 'undefined') {
+                    err = 'Exception: ' + cv.exceptionFromPtr(ptr).msg;
+                }
+            }
+        } else if (err instanceof Error) {
+            err = err.stack.replace(/\n/g, '<br>');
+        }
+        this.errorOutput.innerHTML = err;
+    };
+
+    this.loadCode = function(scriptId, textAreaId) {
+        let scriptNode = document.getElementById(scriptId);
+        let textArea = document.getElementById(textAreaId);
+        if (scriptNode.type !== 'text/code-snippet') {
+            throw Error('Unknown code snippet type');
+        }
+        textArea.value = scriptNode.text.replace(/^\n/, '');
+    };
+
+    this.addFileInputHandler = function(fileInputId, canvasId) {
+        let inputElement = document.getElementById(fileInputId);
+        inputElement.addEventListener('change', (e) => {
+            let files = e.target.files;
+            if (files.length > 0) {
+                let imgUrl = URL.createObjectURL(files[0]);
+                self.loadImageToCanvas(imgUrl, canvasId);
+            }
+        }, false);
+    };
+
+    function onVideoCanPlay() {
+        if (self.onCameraStartedCallback) {
+            self.onCameraStartedCallback(self.stream, self.video);
+        }
+    };
+
+    this.startCamera = function(resolution, callback, videoId) {
+        const constraints = {
+            'qvga': {width: {exact: 320}, height: {exact: 240}},
+            'vga': {width: {exact: 640}, height: {exact: 480}}};
+        let video = document.getElementById(videoId);
+        if (!video) {
+            video = document.createElement('video');
+        }
+
+        let videoConstraint = constraints[resolution];
+        if (!videoConstraint) {
+            videoConstraint = true;
+        }
+
+        navigator.mediaDevices.getUserMedia({video: videoConstraint, audio: false})
+            .then(function(stream) {
+                video.srcObject = stream;
+                video.play();
+                self.video = video;
+                self.stream = stream;
+                self.onCameraStartedCallback = callback;
+                video.addEventListener('canplay', onVideoCanPlay, false);
+            })
+            .catch(function(err) {
+                self.printError('Camera Error: ' + err.name + ' ' + err.message);
+            });
+    };
+
+    this.stopCamera = function() {
+        if (this.video) {
+            this.video.pause();
+            this.video.srcObject = null;
+            this.video.removeEventListener('canplay', onVideoCanPlay);
+        }
+        if (this.stream) {
+            this.stream.getVideoTracks()[0].stop();
+        }
+    };
+};
diff --git a/doc/js_tutorials/js_core/js_basic_ops/js_basic_ops.markdown b/doc/js_tutorials/js_core/js_basic_ops/js_basic_ops.markdown
new file mode 100644 (file)
index 0000000..79ca086
--- /dev/null
@@ -0,0 +1,266 @@
+Basic Operations on Images {#tutorial_js_basic_ops}
+==========================
+
+Goal
+----
+
+-   Learn how to access image properties
+-   Learn how to construct Mat
+-   Learn how to copy Mat
+-   Learn how to convert the type of Mat
+-   Learn how to use MatVector
+-   Learn how to access pixel values and modify them
+-   Learn how to set Region of Interest (ROI)
+-   Learn how to split and merge images
+
+Accessing Image Properties
+--------------------------
+
+Image properties include number of rows, columns and size, depth, channels, type of image data.
+
+@code{.js}
+let src = cv.imread("canvasInput");
+console.log('image width: ' + src.cols + '\n' +
+            'image height: ' + src.rows + '\n' +
+            'image size: ' + src.size().width + '*' src.size().height + '\n' +
+            'image depth: ' + src.depth() + '\n' +
+            'image channels ' + src.channels() + '\n' +
+            'image type: ' + src.type() + '\n');
+@endcode
+
+@note src.type() is very important while debugging because a large number of errors in OpenCV.js
+code are caused by invalid data type.
+
+How to construct Mat
+--------------------
+
+There are 4 basic constructors:
+
+@code{.js}
+// 1. default constructor
+let mat = new cv.Mat();
+// 2. two-dimensional arrays by size and type
+let mat = new cv.Mat(size, type);
+// 3. two-dimensional arrays by rows, cols, and type
+let mat = new cv.Mat(rows, cols, type);
+// 4. two-dimensional arrays by rows, cols, and type with initialization value
+let mat = new cv.Mat(rows, cols, type, new cv.Scalar());
+@endcode
+
+There are 3 static functions:
+
+@code{.js}
+// 1. Create a Mat which is full of zeros
+let mat = cv.Mat.zeros(rows, cols, type);
+// 2. Create a Mat which is full of ones
+let mat = cv.Mat.ones(rows, cols, type);
+// 3. Create a Mat which is an identity matrix
+let mat = cv.Mat.eye(rows, cols, type);
+@endcode
+
+There are 2 factory functions:
+@code{.js}
+// 1. Use JS array to construct a mat.
+// For example: let mat = cv.matFromArray(2, 2, cv.CV_8UC1, [1, 2, 3, 4]);
+let mat = cv.matFromArray(rows, cols, type, array);
+// 2. Use imgData to construct a mat
+let ctx = canvas.getContext("2d");
+let imgData = ctx.getImageData(0, 0, canvas.width, canvas.height);
+let mat = cv.matFromImageData(imgData);
+@endcode
+
+@note Don't forget to delete cv.Mat when you don't want to use it any more.
+
+How to copy Mat
+---------------
+
+There are 2 ways to copy a Mat:
+
+@code{.js}
+// 1. Clone
+let dst = src.clone();
+// 2. CopyTo(only entries indicated in the mask are copied)
+src.copyTo(dst, mask);
+@endcode
+
+How to convert the type of Mat
+------------------------------
+
+We use the function: **convertTo(m, rtype, alpha = 1, beta = 0)**
+@param m        output matrix; if it does not have a proper size or type before the operation, it is reallocated.
+@param rtype    desired output matrix type or, rather, the depth since the number of channels are the same as the input has; if rtype is negative, the output matrix will have the same type as the input.
+@param alpha    optional scale factor.
+@param beta     optional delta added to the scaled values.
+
+@code{.js}
+src.convertTo(dst, rtype);
+@endcode
+
+How use MatVector
+-----------------
+
+@code{.js}
+let mat = new cv.Mat();
+// Initialise a MatVector
+let matVec = new cv.MatVector();
+// Push a Mat back into MatVector
+matVec.push_back(mat);
+// Get a Mat fom MatVector
+let cnt = matVec.get(0);
+mat.delete(); matVec.delete(); cnt.delete();
+@endcode
+
+@note Don't forget to delete cv.Mat, cv.MatVector and cnt(the Mat you get from MatVector) when you don't want to use them any more.
+
+Accessing and Modifying pixel values
+------------------------------------
+
+Firstly, you should know the following type relationship:
+
+Data Properties  | C++ Type | JavaScript Typed Array | Mat Type
+---------------  | -------- | ---------------------- | --------
+data             | uchar    | Uint8Array             | CV_8U
+data8S           | char     | Int8Array              | CV_8S
+data16U          | ushort   | Uint16Array            | CV_16U
+data16S          | short    | Int16Array             | CV_16S
+data32S          | int      | Int32Array             | CV_32S
+data32F          | float    | Float32Array           | CV_32F
+data64F          | double   | Float64Array           | CV_64F
+
+**1. data**
+
+@code{.js}
+let row = 3, col = 4;
+let src = cv.imread("canvasInput");
+if (src.isContinuous()) {
+    let R = src.data[row * src.cols * src.channels() + col * src.channels()];
+    let G = src.data[row * src.cols * src.channels() + col * src.channels() + 1];
+    let B = src.data[row * src.cols * src.channels() + col * src.channels() + 2];
+    let A = src.data[row * src.cols * src.channels() + col * src.channels() + 3];
+}
+@endcode
+
+@note  Data manipulation is only valid for continuous Mat. You should use isContinuous() to check first.
+
+**2. at**
+
+Mat Type  | At Manipulation
+--------- | ---------------
+CV_8U     | ucharAt
+CV_8S     | charAt
+CV_16U    | ushortAt
+CV_16S    | shortAt
+CV_32S    | intAt
+CV_32F    | floatAt
+CV_64F    | doubleAt
+
+@code{.js}
+let row = 3, col = 4;
+let src = cv.imread("canvasInput");
+let R = src.ucharAt(row, col * src.channels());
+let G = src.ucharAt(row, col * src.channels() + 1);
+let B = src.ucharAt(row, col * src.channels() + 2);
+let A = src.ucharAt(row, col * src.channels() + 3);
+@endcode
+
+@note  At manipulation is only for single channel access and the value can't be modified.
+
+**3. ptr**
+
+Mat Type  | Ptr Manipulation | JavaScript Typed Array
+--------  | ---------------  | ----------------------
+CV_8U     | ucharPtr         | Uint8Array
+CV_8S     | charPtr          | Int8Array
+CV_16U    | ushortPtr        | Uint16Array
+CV_16S    | shortPtr         | Int16Array
+CV_32S    | intPtr           | Int32Array
+CV_32F    | floatPtr         | Float32Array
+CV_64F    | doublePtr        | Float64Array
+
+@code{.js}
+let row = 3, col = 4;
+let src = cv.imread("canvasInput");
+let pixel = src.ucharPtr(row, col);
+let R = pixel[0];
+let G = pixel[1];
+let B = pixel[2];
+let A = pixel[3];
+@endcode
+
+mat.ucharPtr(k) get the k th row of the mat. mat.ucharPtr(i, j) get the i th row and the j th column of the mat.
+
+Image ROI
+---------
+
+Sometimes, you will have to play with certain region of images. For eye detection in images, first
+face detection is done all over the image and when face is obtained, we select the face region alone
+and search for eyes inside it instead of searching whole image. It improves accuracy (because eyes
+are always on faces) and performance (because we search for a small area)
+
+We use the function: **roi (rect)**
+@param rect    rectangle Region of Interest.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_basic_ops_roi.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+
+Splitting and Merging Image Channels
+------------------------------------
+
+Sometimes you will need to work separately on R,G,B channels of image. Then you need to split the
+RGB images to single planes. Or another time, you may need to join these individual channels to RGB
+image.
+
+@code{.js}
+let src = cv.imread("canvasInput");
+let rgbaPlanes = new cv.MatVector();
+// Split the Mat
+cv.split(src, rgbaPlanes);
+// Get R channel
+let R = rgbaPlanes.get(0);
+// Merge all channels
+cv.merge(rgbaPlanes, src);
+src.delete(); rgbaPlanes.delete(); R.delete();
+@endcode
+
+@note Don't forget to delete cv.Mat, cv.MatVector and R(the Mat you get from MatVector) when you don't want to use them any more.
+
+Making Borders for Images (Padding)
+-----------------------------------
+
+If you want to create a border around the image, something like a photo frame, you can use
+**cv.copyMakeBorder()** function. But it has more applications for convolution operation, zero
+padding etc. This function takes following arguments:
+
+-   **src** - input image
+-   **top**, **bottom**, **left**, **right** - border width in number of pixels in corresponding
+    directions
+
+-   **borderType** - Flag defining what kind of border to be added. It can be following types:
+    -   **cv.BORDER_CONSTANT** - Adds a constant colored border. The value should be given
+            as next argument.
+        -   **cv.BORDER_REFLECT** - Border will be mirror reflection of the border elements,
+            like this : *fedcba|abcdefgh|hgfedcb*
+        -   **cv.BORDER_REFLECT_101** or **cv.BORDER_DEFAULT** - Same as above, but with a
+            slight change, like this : *gfedcb|abcdefgh|gfedcba*
+        -   **cv.BORDER_REPLICATE** - Last element is replicated throughout, like this:
+            *aaaaaa|abcdefgh|hhhhhhh*
+        -   **cv.BORDER_WRAP** - Can't explain, it will look like this :
+            *cdefgh|abcdefgh|abcdefg*
+
+-   **value** - Color of border if border type is cv.BORDER_CONSTANT
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_basic_ops_copymakeborder.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_core/js_image_arithmetics/js_image_arithmetics.markdown b/doc/js_tutorials/js_core/js_image_arithmetics/js_image_arithmetics.markdown
new file mode 100644 (file)
index 0000000..73e4839
--- /dev/null
@@ -0,0 +1,62 @@
+Arithmetic Operations on Images {#tutorial_js_image_arithmetics}
+===============================
+
+Goal
+----
+
+-   Learn several arithmetic operations on images like addition, subtraction, bitwise operations
+    etc.
+-   You will learn these functions : **cv.add()**, **cv.subtract()**  etc.
+
+Image Addition
+--------------
+
+You can add two images by OpenCV function, cv.add(). res = img1 + img2. Both images should be of same depth and type.
+
+For example, consider below sample:
+@code{.js}
+let src1 = cv.imread("canvasInput1");
+let src2 = cv.imread("canvasInput2");
+let dst = new cv.Mat();
+let mask = new cv.Mat();
+let dtype = -1;
+cv.add(src1, src2, dst, mask, dtype);
+src1.delete(); src2.delete(); dst.delete(); mask.delete();
+@endcode
+
+Image Subtraction
+--------------
+
+You can subtract two images by OpenCV function, cv.subtract(). res = img1 - img2. Both images should be of same depth and type.
+
+For example, consider below sample:
+@code{.js}
+let src1 = cv.imread("canvasInput1");
+let src2 = cv.imread("canvasInput2");
+let dst = new cv.Mat();
+let mask = new cv.Mat();
+let dtype = -1;
+cv.subtract(src1, src2, dst, mask, dtype);
+src1.delete(); src2.delete(); dst.delete(); mask.delete();
+@endcode
+
+Bitwise Operations
+------------------
+
+This includes bitwise AND, OR, NOT and XOR operations. They will be highly useful while extracting
+any part of the image, defining and working with non-rectangular
+ROI etc. Below we will see an example on how to change a particular region of an image.
+
+I want to put OpenCV logo above an image. If I add two images, it will change color. If I blend it,
+I get an transparent effect. But I want it to be opaque. If it was a rectangular region, I could use
+ROI as we did in last chapter. But OpenCV logo is a not a rectangular shape. So you can do it with
+bitwise operations.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_image_arithmetics_bitwise.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_core/js_some_data_structures/js_image_arithmetics.markdown b/doc/js_tutorials/js_core/js_some_data_structures/js_image_arithmetics.markdown
new file mode 100644 (file)
index 0000000..990819e
--- /dev/null
@@ -0,0 +1,120 @@
+Some Data Structures {#tutorial_js_some_data_structures}
+===============================
+
+Goal
+----
+
+-   You will learn some data structures : **Point**, **Scalar**, **Size**,  **Circle**, **Rect**, **RotatedRect**  etc.
+
+Scalar is array type in Javascript. Point, Size, Circle, Rect and RotatedRect are object type in JavaScript.
+
+Point
+--------------
+
+There are 2 ways to construct a Point and they are the same:
+@code{.js}
+// The first way
+let point = new cv.Point(x, y);
+// The second way
+let point = {x: x, y: y};
+@endcode
+
+@param x      x coordinate of the point.(the origin is the top left corner of the image)
+@param y      y coordinate of the point.
+
+Scalar
+--------------
+
+There are 2 ways to construct a Scalar and they are the same:
+@code{.js}
+// The first way
+let scalar = new cv.Scalar(R, G, B, Alpha);
+// The second way
+let scalar = [R, G, B, Alpha];
+@endcode
+
+@param R     pixel value of red channel.
+@param G     pixel value of green channel.
+@param B     pixel value of blue channel.
+@param Alpha pixel value of alpha channel.
+
+Size
+------------------
+
+There are 2 ways to construct a Size and they are the same:
+@code{.js}
+// The first way
+let size = new cv.Size(width, height);
+// The second way
+let size = {width : width, height : height};
+@endcode
+
+@param width    the width of the size.
+@param height   the height of the size.
+
+Circle
+------------------
+
+There are 2 ways to construct a Circle and they are the same:
+@code{.js}
+// The first way
+let circle = new cv.Circle(center, radius);
+// The second way
+let circle = {center : center, radius : radius};
+@endcode
+
+@param center    the center of the circle.
+@param radius    the radius of the circle.
+
+Rect
+------------------
+
+There are 2 ways to construct a Rect and they are the same:
+@code{.js}
+// The first way
+let rect = new cv.Rect(x, y, width, height);
+// The second way
+let rect = {x : x, y : y, width : width, height : height};
+@endcode
+
+@param x        x coordinate of the vertex which is the top left corner of the rectangle.
+@param y        y coordinate of the vertex which is the top left corner of the rectangle.
+@param width    the width of the rectangle.
+@param height   the height of the rectangle.
+
+RotatedRect
+------------------
+
+There are 2 ways to construct a RotatedRect and they are the same:
+@code{.js}
+// The first way
+let rotatedRect = new cv.RotatedRect(center, size, angle);
+// The second way
+let rotatedRect = {center : center, size : size, angle : angle};
+@endcode
+
+@param center  the rectangle mass center.
+@param size    width and height of the rectangle.
+@param angle   the rotation angle in a clockwise direction. When the angle is 0, 90, 180, 270 etc., the rectangle becomes an up-right rectangle.
+
+Learn how to get the vertices from rotatedRect:
+
+We use the function: **cv.RotatedRect.points(rotatedRect)**
+@param rotatedRect       rotated rectangle
+
+@code{.js}
+let vertices = cv.RotatedRect.points(rotatedRect);
+let point1 = vertices[0];
+let point2 = vertices[1];
+let point3 = vertices[2];
+let point4 = vertices[3];
+@endcode
+
+Learn how to get the bounding rectangle from rotatedRect:
+
+We use the function: **cv.RotatedRect.boundingRect(rotatedRect)**
+@param rotatedRect       rotated rectangle
+
+@code{.js}
+let boundingRect = cv.RotatedRect.boundingRect(rotatedRect);
+@endcode
\ No newline at end of file
diff --git a/doc/js_tutorials/js_core/js_table_of_contents_core.markdown b/doc/js_tutorials/js_core/js_table_of_contents_core.markdown
new file mode 100644 (file)
index 0000000..afb68ed
--- /dev/null
@@ -0,0 +1,16 @@
+Core Operations {#tutorial_js_table_of_contents_core}
+===============
+
+-   @subpage tutorial_js_basic_ops
+
+    Learn to read and
+    edit pixel values, working with image ROI and other basic operations.
+
+-   @subpage tutorial_js_image_arithmetics
+
+    Perform arithmetic
+    operations on images
+
+-   @subpage tutorial_js_some_data_structures
+
+    Learn some data structures
diff --git a/doc/js_tutorials/js_gui/js_image_display/images/Imread_Imshow_Tutorial_Result.png b/doc/js_tutorials/js_gui/js_image_display/images/Imread_Imshow_Tutorial_Result.png
new file mode 100644 (file)
index 0000000..4de5c18
Binary files /dev/null and b/doc/js_tutorials/js_gui/js_image_display/images/Imread_Imshow_Tutorial_Result.png differ
diff --git a/doc/js_tutorials/js_gui/js_image_display/js_image_display.markdown b/doc/js_tutorials/js_gui/js_image_display/js_image_display.markdown
new file mode 100644 (file)
index 0000000..efe65e3
--- /dev/null
@@ -0,0 +1,95 @@
+Getting Started with Images {#tutorial_js_image_display}
+===========================
+
+Goals
+-----
+
+-   Learn how to read an image and how to display it in a web.
+
+Read an image
+-------------
+
+OpenCV.js saves images as cv.Mat type. We use HTML canvas element to transfer cv.Mat to the web
+or in reverse. The ImageData interface can represent or set the underlying pixel data of an area of a
+canvas element.
+
+@sa Please refer to canvas docs for more details.
+
+First, create an ImageData obj from canvas:
+@code{.js}
+let canvas = document.getElementById(canvasInputId);
+let ctx = canvas.getContext('2d');
+let imgData = ctx.getImageData(0, 0, canvas.width, canvas.height);
+@endcode
+
+Then, use cv.matFromImageData to construct a cv.Mat:
+@code{.js}
+let src = cv.matFromImageData(imgData);
+@endcode
+
+@note Because canvas only support 8-bit RGBA image with continuous storage, the cv.Mat type is cv.CV_8UC4.
+It is different from native OpenCV because images returned and shown by the native **imread** and
+**imshow** have the channels stored in BGR order.
+
+Display an image
+----------------
+
+First, convert the type of src to cv.CV_8UC4:
+@code{.js}
+let dst = new cv.Mat();
+// scale and shift are used to map the data to [0, 255].
+src.convertTo(dst, cv.CV_8U, scale, shift);
+// *** is GRAY, RGB, or RGBA, according to src.channels() is 1, 3 or 4.
+cv.cvtColor(dst, dst, cv.COLOR_***2RGBA);
+@endcode
+
+Then, new an ImageData obj from dst:
+@code{.js}
+let imgData = new ImageData(new Uint8ClampedArray(dst.data, dst.cols, dst.rows);
+@endcode
+
+Finally, display it:
+@code{.js}
+let canvas = document.getElementById(canvasOutputId);
+let ctx = canvas.getContext('2d');
+ctx.clearRect(0, 0, canvas.width, canvas.height);
+canvas.width = imgData.width;
+canvas.height = imgData.height;
+ctx.putImageData(imgData, 0, 0);
+@endcode
+
+In OpenCV.js
+------------
+
+OpenCV.js implements image reading and showing using the above method.
+
+We use **cv.imread (imageSource)** to read an image from html canvas or img element.
+@param imageSource   canvas element or id, or img element or id.
+@return              mat with channels stored in RGBA order.
+
+We use **cv.imshow (canvasSource, mat)** to display it. The function may scale the mat,
+depending on its depth:
+- If the mat is 8-bit unsigned, it is displayed as is.
+- If the mat is 16-bit unsigned or 32-bit integer, the pixels are divided by 256. That
+is, the value range [0,255*256] is mapped to [0,255].
+- If the mat is 32-bit floating-point, the pixel values are multiplied by 255. That is,
+the value range [0,1] is mapped to [0,255].
+
+@param canvasSource  canvas element or id.
+@param mat           mat to be shown.
+
+The above code of image reading and showing could be simplified as below.
+@code{.js}
+let img = cv.imread(imageSource);
+cv.imshow(canvasOutput, img);
+img.delete();
+@endcode
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_image_display.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
diff --git a/doc/js_tutorials/js_gui/js_table_of_contents_gui.markdown b/doc/js_tutorials/js_gui/js_table_of_contents_gui.markdown
new file mode 100644 (file)
index 0000000..3a968bf
--- /dev/null
@@ -0,0 +1,14 @@
+GUI Features {#tutorial_js_table_of_contents_gui}
+============
+
+-   @subpage tutorial_js_image_display
+
+    Learn to load an image and display it in a web
+
+-   @subpage tutorial_js_video_display
+
+    Learn to capture video from Camera and play it
+
+-   @subpage tutorial_js_trackbar
+
+    Create trackbar to control certain parameters
diff --git a/doc/js_tutorials/js_gui/js_trackbar/images/Trackbar_Tutorial_Range.png b/doc/js_tutorials/js_gui/js_trackbar/images/Trackbar_Tutorial_Range.png
new file mode 100644 (file)
index 0000000..d897cba
Binary files /dev/null and b/doc/js_tutorials/js_gui/js_trackbar/images/Trackbar_Tutorial_Range.png differ
diff --git a/doc/js_tutorials/js_gui/js_trackbar/images/Trackbar_Tutorial_Result.png b/doc/js_tutorials/js_gui/js_trackbar/images/Trackbar_Tutorial_Result.png
new file mode 100644 (file)
index 0000000..8f3d411
Binary files /dev/null and b/doc/js_tutorials/js_gui/js_trackbar/images/Trackbar_Tutorial_Result.png differ
diff --git a/doc/js_tutorials/js_gui/js_trackbar/js_trackbar.markdown b/doc/js_tutorials/js_gui/js_trackbar/js_trackbar.markdown
new file mode 100644 (file)
index 0000000..ed1c021
--- /dev/null
@@ -0,0 +1,73 @@
+Add a Trackbar to Your Application {#tutorial_js_trackbar}
+==================================
+
+Goal
+----
+
+-   Use HTML DOM Input Range Object to add a trackbar to your application.
+
+Code Demo
+---------
+
+Here, we will create a simple application that blends two images. We will let the user enter the
+weight by using the trackbar.
+
+First, we need to create three canvas elements: two for input and one for output. Please refer to
+the tutorial @ref tutorial_js_image_display.
+@code{.js}
+let src1 = cv.imread('canvasInput1');
+let src2 = cv.imread('canvasInput2');
+@endcode
+
+Then, we use HTML DOM Input Range Object to implement the trackbar, which is shown as below.
+![](images/Trackbar_Tutorial_Range.png)
+
+@note &lt;input&gt; elements with type="range" are not supported in Internet Explorer 9 and earlier versions.
+
+You can create an &lt;input&gt; element with type="range" with the document.createElement() method:
+@code{.js}
+let x = document.createElement('INPUT');
+x.setAttribute('type', 'range');
+@endcode
+
+You can access an &lt;input&gt; element with type="range" with getElementById():
+@code{.js}
+let x = document.getElementById('myRange');
+@endcode
+
+As a trackbar, the range element need a trackbar name, the default value, minimum value, maximum value,
+step and the callback function which is executed everytime trackbar value changes. The callback function
+always has a default argument, which is the trackbar position. Additionally, a text element to display the
+trackbar value is fine. In our case, we can create the trackbar as below:
+@code{.html}
+Weight: <input type="range" id="trackbar" value="50" min="0" max="100" step="1" oninput="callback()">
+<input type="text" id="weightValue" size="3" value="50"/>
+@endcode
+
+Finally, we can use the trackbar value in the callback function, blend the two images, and display the result.
+@code{.js}
+let weightValue = document.getElementById('weightValue');
+let trackbar = document.getElementById('trackbar');
+weightValue.setAttribute('value', trackbar.value);
+let alpha = trackbar.value/trackbar.max;
+let beta = ( 1.0 - alpha );
+let src1 = cv.imread('canvasInput1');
+let src2 = cv.imread('canvasInput2');
+let dst = new cv.Mat();
+cv.addWeighted( src1, alpha, src2, beta, 0.0, dst, -1);
+cv.imshow('canvasOutput', dst);
+dst.delete();
+src1.delete();
+src2.delete();
+@endcode
+
+@sa cv.addWeighted
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_trackbar.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
diff --git a/doc/js_tutorials/js_gui/js_video_display/js_video_display.markdown b/doc/js_tutorials/js_gui/js_video_display/js_video_display.markdown
new file mode 100644 (file)
index 0000000..613c593
--- /dev/null
@@ -0,0 +1,105 @@
+Getting Started with Videos {#tutorial_js_video_display}
+===========================
+
+Goal
+----
+
+-   Learn to capture video from a camera and display it.
+
+Capture video from camera
+-------------------------
+
+Often, we have to capture live stream with a camera. In OpenCV.js, we use [WebRTC](https://webrtc.org/)
+and HTML canvas element to implement this. Let's capture a video from the camera(built-in
+or a usb), convert it into grayscale video and display it.
+
+To capture a video, you need to add some HTML elements to the web page:
+- a &lt;video&gt; to display video from camera directly
+- a &lt;canvas&gt; to transfer video to canvas ImageData frame-by-frame
+- another &lt;canvas&gt; to display the video OpenCV.js gets
+
+First, we use WebRTC navigator.mediaDevices.getUserMedia to get the media stream.
+@code{.js}
+let video = document.getElementById("videoInput"); // video is the id of video tag
+navigator.mediaDevices.getUserMedia({ video: true, audio: false })
+    .then(function(stream) {
+        video.srcObject = stream;
+        video.play();
+    })
+    .catch(function(err) {
+        console.log("An error occured! " + err);
+    });
+@endcode
+
+@note This function is unnecessary when you capture video from a video file. But notice that
+HTML video element only supports video formats of Ogg(Theora), WebM(VP8/VP9) or MP4(H.264).
+
+Playing video
+-------------
+Now, the browser gets the camera stream. Then, we use CanvasRenderingContext2D.drawImage() method
+of the Canvas 2D API to draw video onto the canvas. Finally, we can use the method in @ref tutorial_js_image_display
+ to read and display image in canvas. For playing video, cv.imshow() should be executed every delay
+milliseconds. We recommend setTimeout() method. And if the video is 30fps, the delay milliseconds
+should be (1000/30 - processing_time).
+@code{.js}
+let canvasFrame = document.getElementById("canvasFrame"); // canvasFrame is the id of <canvas>
+let context = canvasFrame.getContext("2d");
+let src = new cv.Mat(height, width, cv.CV_8UC4);
+let dst = new cv.Mat(height, width, cv.CV_8UC1);
+
+const FPS = 30;
+function processVideo() {
+    let begin = Date.now();
+    context.drawImage(video, 0, 0, width, height);
+    src.data.set(context.getImageData(0, 0, width, height).data);
+    cv.cvtColor(src, dst, cv.COLOR_RGBA2GRAY);
+    cv.imshow("canvasOutput", dst); // canvasOutput is the id of another <canvas>;
+    // schedule next one.
+    let delay = 1000/FPS - (Date.now() - begin);
+    setTimeout(processVideo, delay);
+}
+
+// schedule first one.
+setTimeout(processVideo, 0);
+@endcode
+
+OpenCV.js implements **cv.VideoCapture (videoSource)** using the above method. You need not to
+add the hidden canvas element manually.
+@param videoSource   the video id or element.
+@return              cv.VideoCapture instance
+
+We use **read (image)** to get one frame of the video. For performance reasons, the image should be
+constructed with cv.CV_8UC4 type and same size as the video.
+@param image         image with cv.CV_8UC4 type and same size as the video.
+
+The above code of playing video could be simplified as below.
+@code{.js}
+let src = new cv.Mat(height, width, cv.CV_8UC4);
+let dst = new cv.Mat(height, width, cv.CV_8UC1);
+let cap = new cv.VideoCapture(videoSource);
+
+const FPS = 30;
+function processVideo() {
+    let begin = Date.now();
+    cap.read(src);
+    cv.cvtColor(src, dst, cv.COLOR_RGBA2GRAY);
+    cv.imshow("canvasOutput", dst);
+    // schedule next one.
+    let delay = 1000/FPS - (Date.now() - begin);
+    setTimeout(processVideo, delay);
+}
+
+// schedule first one.
+setTimeout(processVideo, 0);
+@endcode
+
+@note Remember to delete src and dst after when stop.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_video_display.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_canny/js_canny.markdown b/doc/js_tutorials/js_imgproc/js_canny/js_canny.markdown
new file mode 100644 (file)
index 0000000..3f52d90
--- /dev/null
@@ -0,0 +1,88 @@
+Canny Edge Detection {#tutorial_js_canny}
+====================
+
+Goal
+----
+
+-   Concept of Canny edge detection
+-   OpenCV functions for that : **cv.Canny()**
+
+Theory
+------
+
+Canny Edge Detection is a popular edge detection algorithm. It was developed by John F. Canny in 1986. It is a multi-stage algorithm and we will go through each stages.
+
+-#  **Noise Reduction**
+
+    Since edge detection is susceptible to noise in the image, first step is to remove the noise in the
+    image with a 5x5 Gaussian filter. We have already seen this in previous chapters.
+
+-#  **Finding Intensity Gradient of the Image**
+
+    Smoothened image is then filtered with a Sobel kernel in both horizontal and vertical direction to
+    get first derivative in horizontal direction (\f$G_x\f$) and vertical direction (\f$G_y\f$). From these two
+    images, we can find edge gradient and direction for each pixel as follows:
+
+    \f[
+    Edge\_Gradient \; (G) = \sqrt{G_x^2 + G_y^2} \\
+    Angle \; (\theta) = \tan^{-1} \bigg(\frac{G_y}{G_x}\bigg)
+    \f]
+
+    Gradient direction is always perpendicular to edges. It is rounded to one of four angles
+    representing vertical, horizontal and two diagonal directions.
+
+-#  **Non-maximum Suppression**
+
+    After getting gradient magnitude and direction, a full scan of image is done to remove any unwanted
+    pixels which may not constitute the edge. For this, at every pixel, pixel is checked if it is a
+    local maximum in its neighborhood in the direction of gradient. Check the image below:
+
+    ![image](images/nms.jpg)
+
+    Point A is on the edge ( in vertical direction). Gradient direction is normal to the edge. Point B
+    and C are in gradient directions. So point A is checked with point B and C to see if it forms a
+    local maximum. If so, it is considered for next stage, otherwise, it is suppressed ( put to zero).
+
+    In short, the result you get is a binary image with "thin edges".
+
+-#  **Hysteresis Thresholding**
+
+    This stage decides which are all edges are really edges and which are not. For this, we need two
+    threshold values, minVal and maxVal. Any edges with intensity gradient more than maxVal are sure to
+    be edges and those below minVal are sure to be non-edges, so discarded. Those who lie between these
+    two thresholds are classified edges or non-edges based on their connectivity. If they are connected
+    to "sure-edge" pixels, they are considered to be part of edges. Otherwise, they are also discarded.
+    See the image below:
+
+    ![image](images/hysteresis.jpg)
+
+    The edge A is above the maxVal, so considered as "sure-edge". Although edge C is below maxVal, it is
+    connected to edge A, so that also considered as valid edge and we get that full curve. But edge B,
+    although it is above minVal and is in same region as that of edge C, it is not connected to any
+    "sure-edge", so that is discarded. So it is very important that we have to select minVal and maxVal
+    accordingly to get the correct result.
+
+    This stage also removes small pixels noises on the assumption that edges are long lines.
+
+So what we finally get is strong edges in the image.
+
+Canny Edge Detection in OpenCV
+------------------------------
+
+We use the function: **cv.Canny(image, edges, threshold1, threshold2, apertureSize = 3, L2gradient = false)**
+@param image         8-bit input image.
+@param edges         output edge map; single channels 8-bit image, which has the same size as image.
+@param threshold1    first threshold for the hysteresis procedure.
+@param threshold2    second threshold for the hysteresis procedure..
+@param apertureSize  aperture size for the Sobel operator.
+@param L2gradient    specifies the equation for finding gradient
+magnitude. If it is True, it uses the equation mentioned above which is more accurate, otherwise it uses this function: \f$Edge\_Gradient \; (G) = |G_x| + |G_y|\f$.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_canny.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_colorspaces/js_colorspaces.markdown b/doc/js_tutorials/js_imgproc/js_colorspaces/js_colorspaces.markdown
new file mode 100644 (file)
index 0000000..9dbfe96
--- /dev/null
@@ -0,0 +1,52 @@
+Changing Colorspaces {#tutorial_js_colorspaces}
+====================
+
+Goal
+----
+
+-   In this tutorial, you will learn how to convert images from one color-space to another, like
+    RGB \f$\leftrightarrow\f$ Gray, RGB \f$\leftrightarrow\f$ HSV etc.
+-   You will learn following functions : **cv.cvtColor()**, **cv.inRange()** etc.
+
+cvtColor
+--------------------
+
+There are more than 150 color-space conversion methods available in OpenCV. But we will look into
+the most widely used one: RGB \f$\leftrightarrow\f$ Gray.
+
+We use the function: **cv.cvtColor (src, dst, code, dstCn = 0)**
+@param src    input image.
+@param dst    output image of the same size and depth as src
+@param code   color space conversion code(see **cv.ColorConversionCodes**).
+@param dstCn  number of channels in the destination image; if the parameter is 0, the number of the channels is derived automatically from src and code.
+
+For RGB \f$\rightarrow\f$ Gray conversion we use the code cv.COLOR_RGBA2GRAY.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_colorspaces_cvtColor.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+inRange
+---------------
+
+Checks if array elements lie between the elements of two other arrays.
+
+We use the function: **cv.inRange (src, lowerb, upperb, dst)**
+@param src     first input image.
+@param lowerb  inclusive lower boundary Mat of the same size as src.
+@param upperb  inclusive upper boundary Mat of the same size as src.
+@param dst     output image of the same size as src and cv.CV_8U type.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_colorspaces_inRange.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_contours/js_contour_features/js_contour_features.markdown b/doc/js_tutorials/js_imgproc/js_contours/js_contour_features/js_contour_features.markdown
new file mode 100644 (file)
index 0000000..22544b2
--- /dev/null
@@ -0,0 +1,252 @@
+Contour Features {#tutorial_js_contour_features}
+================
+
+Goal
+----
+
+-   To find the different features of contours, like area, perimeter, centroid, bounding box etc
+-   You will learn plenty of functions related to contours.
+
+1. Moments
+----------
+
+Image moments help you to calculate some features like center of mass of the object, area of the
+object etc. Check out the wikipedia page on [Image
+Moments](http://en.wikipedia.org/wiki/Image_moment)
+
+We use the function: **cv.moments (array, binaryImage = false)**
+@param array         raster image (single-channel, 8-bit or floating-point 2D array) or an array ( 1×N or N×1 ) of 2D points.
+@param binaryImage   if it is true, all non-zero image pixels are treated as 1's. The parameter is used for images only.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_contour_features_moments.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+From this moments, you can extract useful data like area, centroid etc. Centroid is given by the
+relations, \f$C_x = \frac{M_{10}}{M_{00}}\f$ and \f$C_y = \frac{M_{01}}{M_{00}}\f$. This can be done as
+follows:
+@code{.js}
+let cx = M.m10/M.m00
+let cy = M.m01/M.m00
+@endcode
+
+2. Contour Area
+---------------
+
+Contour area is given by the function **cv.contourArea()** or from moments, **M['m00']**.
+
+We use the function: **cv.contourArea (contour, oriented = false)**
+@param contour    input vector of 2D points (contour vertices)
+@param oriented   oriented area flag. If it is true, the function returns a signed area value, depending on the contour orientation (clockwise or counter-clockwise). Using this feature you can determine orientation of a contour by taking the sign of an area. By default, the parameter is false, which means that the absolute value is returned.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_contour_features_area.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+3. Contour Perimeter
+--------------------
+
+It is also called arc length. It can be found out using **cv.arcLength()** function.
+
+We use the function: **cv.arcLength (curve, closed)**
+@param curve    input vector of 2D points.
+@param closed   flag indicating whether the curve is closed or not.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_contour_features_perimeter.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+4. Contour Approximation
+------------------------
+
+It approximates a contour shape to another shape with less number of vertices depending upon the
+precision we specify. It is an implementation of [Douglas-Peucker
+algorithm](http://en.wikipedia.org/wiki/Ramer-Douglas-Peucker_algorithm). Check the wikipedia page
+for algorithm and demonstration.
+
+We use the function: **cv.approxPolyDP (curve, approxCurve, epsilon, closed)**
+@param curve        input vector of 2D points stored in cv.Mat.
+@param approxCurve  result of the approximation. The type should match the type of the input curve.
+@param epsilon      parameter specifying the approximation accuracy. This is the maximum distance between the original curve and its approximation.
+@param closed       If true, the approximated curve is closed (its first and last vertices are connected). Otherwise, it is not closed.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_contour_features_approxPolyDP.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+5. Convex Hull
+--------------
+
+Convex Hull will look similar to contour approximation, but it is not (Both may provide same results
+in some cases). Here, **cv.convexHull()** function checks a curve for convexity defects and
+corrects it. Generally speaking, convex curves are the curves which are always bulged out, or
+at-least flat. And if it is bulged inside, it is called convexity defects. For example, check the
+below image of hand. Red line shows the convex hull of hand. The double-sided arrow marks shows the
+convexity defects, which are the local maximum deviations of hull from contours.
+
+![image](images/convexitydefects.jpg)
+
+We use the function: **cv.convexHull (points, hull, clockwise = false, returnPoints = true)**
+@param points        input 2D point set.
+@param hull          output convex hull.
+@param clockwise     orientation flag. If it is true, the output convex hull is oriented clockwise. Otherwise, it is oriented counter-clockwise. The assumed coordinate system has its X axis pointing to the right, and its Y axis pointing upwards.
+@param returnPoints  operation flag. In case of a matrix, when the flag is true, the function returns convex hull points. Otherwise, it returns indices of the convex hull points.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_contour_features_convexHull.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+6. Checking Convexity
+---------------------
+
+There is a function to check if a curve is convex or not, **cv.isContourConvex()**. It just return
+whether True or False. Not a big deal.
+
+@code{.js}
+cv.isContourConvex(cnt);
+@endcode
+
+7. Bounding Rectangle
+---------------------
+
+There are two types of bounding rectangles.
+
+### 7.a. Straight Bounding Rectangle
+
+It is a straight rectangle, it doesn't consider the rotation of the object. So area of the bounding
+rectangle won't be minimum.
+
+We use the function: **cv.boundingRect (points)**
+@param points        input 2D point set.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_contour_features_boundingRect.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### 7.b. Rotated Rectangle
+
+Here, bounding rectangle is drawn with minimum area, so it considers the rotation also.
+
+We use the function: **cv.minAreaRect (points)**
+@param points        input 2D point set.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_contour_features_minAreaRect.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+8. Minimum Enclosing Circle
+---------------------------
+
+Next we find the circumcircle of an object using the function **cv.minEnclosingCircle()**. It is a
+circle which completely covers the object with minimum area.
+
+We use the functions: **cv.minEnclosingCircle (points)**
+@param points        input 2D point set.
+
+**cv.circle (img, center, radius, color, thickness = 1, lineType = cv.LINE_8, shift = 0)**
+@param img          image where the circle is drawn.
+@param center       center of the circle.
+@param radius       radius of the circle.
+@param color        circle color.
+@param thickness    thickness of the circle outline, if positive. Negative thickness means that a filled circle is to be drawn.
+@param lineType     type of the circle boundary.
+@param shift        number of fractional bits in the coordinates of the center and in the radius value.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_contour_features_minEnclosingCircle.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+9. Fitting an Ellipse
+---------------------
+
+Next one is to fit an ellipse to an object. It returns the rotated rectangle in which the ellipse is
+inscribed.
+We use the functions: **cv.fitEllipse (points)**
+@param points        input 2D point set.
+
+**cv.ellipse1 (img, box, color, thickness = 1, lineType = cv.LINE_8)**
+@param img        image.
+@param box        alternative ellipse representation via RotatedRect. This means that the function draws an ellipse inscribed in the rotated rectangle.
+@param color      ellipse color.
+@param thickness  thickness of the ellipse arc outline, if positive. Otherwise, this indicates that a filled ellipse sector is to be drawn.
+@param lineType   type of the ellipse boundary.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_contour_features_fitEllipse.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+10. Fitting a Line
+------------------
+
+Similarly we can fit a line to a set of points. We can approximate a straight line to it.
+
+We use the functions: **cv.fitLine (points, line, distType, param, reps, aeps)**
+@param points     input 2D point set.
+@param line       output line parameters. It should be a Mat of 4 elements[vx, vy, x0, y0], where [vx, vy] is a normalized vector collinear to the line and [x0, y0] is a point on the line.
+@param distType   distance used by the M-estimator(see cv.DistanceTypes).
+@param param      numerical parameter ( C ) for some types of distances. If it is 0, an optimal value is chosen.
+@param reps       sufficient accuracy for the radius (distance between the coordinate origin and the line).
+@param aeps       sufficient accuracy for the angle. 0.01 would be a good default value for reps and aeps.
+
+**cv.line (img, pt1, pt2, color, thickness = 1, lineType = cv.LINE_8, shift = 0)**
+@param img          image.
+@param pt1          first point of the line segment.
+@param pt2          second point of the line segment.
+@param color        line color.
+@param thickness    line thickness.
+@param lineType     type of the line,.
+@param shift        number of fractional bits in the point coordinates.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_contour_features_fitLine.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_contours/js_contour_properties/js_contour_properties.markdown b/doc/js_tutorials/js_imgproc/js_contours/js_contour_properties/js_contour_properties.markdown
new file mode 100644 (file)
index 0000000..1dbb15c
--- /dev/null
@@ -0,0 +1,110 @@
+Contour Properties {#tutorial_js_contour_properties}
+==================
+
+Goal
+----
+
+-   Here we will learn to extract some frequently used properties of objects like Solidity, Equivalent
+Diameter, Mask image, Mean Intensity etc.
+
+1. Aspect Ratio
+---------------
+
+It is the ratio of width to height of bounding rect of the object.
+
+\f[Aspect \; Ratio = \frac{Width}{Height}\f]
+@code{.js}
+let rect = cv.boundingRect(cnt);
+let aspectRatio = rect.width / rect.height;
+@endcode
+
+2. Extent
+---------
+
+Extent is the ratio of contour area to bounding rectangle area.
+
+\f[Extent = \frac{Object \; Area}{Bounding \; Rectangle \; Area}\f]
+@code{.js}
+let area = cv.contourArea(cnt, false);
+let rect = cv.boundingRect(cnt));
+let rectArea = rect.width * rect.height;
+let extent = area / rectArea;
+@endcode
+
+3. Solidity
+-----------
+
+Solidity is the ratio of contour area to its convex hull area.
+
+\f[Solidity = \frac{Contour \; Area}{Convex \; Hull \; Area}\f]
+@code{.js}
+let area = cv.contourArea(cnt, false);
+cv.convexHull(cnt, hull, false, true);
+let hullArea = cv.contourArea(hull, false);
+let solidity = area / hullArea;
+@endcode
+
+4. Equivalent Diameter
+----------------------
+
+Equivalent Diameter is the diameter of the circle whose area is same as the contour area.
+
+\f[Equivalent \; Diameter = \sqrt{\frac{4 \times Contour \; Area}{\pi}}\f]
+@code{.js}
+let area = cv.contourArea(cnt, false);
+let equiDiameter = Math.sqrt(4 * area / Math.PI);
+@endcode
+
+5. Orientation
+--------------
+
+Orientation is the angle at which object is directed. Following method also gives the Major Axis and
+Minor Axis lengths.
+@code{.js}
+let rotatedRect = cv.fitEllipse(cnt);
+let angle = rotatedRect.angle;
+@endcode
+
+6. Mask and Pixel Points
+------------------------
+
+In some cases, we may need all the points which comprises that object.
+
+We use the function: **cv.transpose (src, dst)**
+@param src   input array.
+@param dst   output array of the same type as src.
+
+\htmlonly
+<iframe src="../../js_contour_properties_transpose.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+7. Maximum Value, Minimum Value and their locations
+---------------------------------------------------
+
+We use the function: **cv.minMaxLoc(src, mask)**
+@param src      input single-channel array.
+@param mask     optional mask used to select a sub-array.
+
+@code{.js}
+let result = cv.minMaxLoc(src, mask);
+let minVal = result.minVal;
+let maxVal = result.maxVal;
+let minLoc = result.minLoc;
+let maxLoc = result.maxLoc;
+@endcode
+
+8. Mean Color or Mean Intensity
+-------------------------------
+
+Here, we can find the average color of an object. Or it can be average intensity of the object in
+grayscale mode. We again use the same mask to do it.
+
+We use the function: **cv.mean (src, mask)**
+@param src   input array that should have from 1 to 4 channels so that the result can be stored in Scalar.
+@param mask  optional operation mask.
+
+@code{.js}
+let average = cv.mean(src, mask);
+@endcode
diff --git a/doc/js_tutorials/js_imgproc/js_contours/js_contours_begin/js_contours_begin.markdown b/doc/js_tutorials/js_imgproc/js_contours/js_contours_begin/js_contours_begin.markdown
new file mode 100644 (file)
index 0000000..48eb92b
--- /dev/null
@@ -0,0 +1,72 @@
+Contours : Getting Started {#tutorial_js_contours_begin}
+==========================
+
+Goal
+----
+
+-   Understand what contours are.
+-   Learn to find contours, draw contours etc
+-   You will learn these functions : **cv.findContours()**, **cv.drawContours()**
+
+What are contours?
+------------------
+
+Contours can be explained simply as a curve joining all the continuous points (along the boundary),
+having same color or intensity. The contours are a useful tool for shape analysis and object
+detection and recognition.
+
+-   For better accuracy, use binary images. So before finding contours, apply threshold or canny
+    edge detection.
+-   Since opencv 3.2 source image is not modified by this function.
+-   In OpenCV, finding contours is like finding white object from black background. So remember,
+    object to be found should be white and background should be black.
+
+How to draw the contours?
+-------------------------
+
+To draw the contours, cv.drawContours function is used. It can also be used to draw any shape
+provided you have its boundary points.
+
+We use the functions: **cv.findContours (image, contours, hierarchy, mode, method, offset = new cv.Point(0, 0))**
+@param image         source, an 8-bit single-channel image. Non-zero pixels are treated as 1's. Zero pixels remain 0's, so the image is treated as binary.
+@param contours      detected contours.
+@param hierarchy     containing information about the image topology. It has as many elements as the number of contours.
+@param mode          contour retrieval mode(see cv.RetrievalModes).
+@param method        contour approximation method(see cv.ContourApproximationModes).
+@param offset        optional offset by which every contour point is shifted. This is useful if the contours are extracted from the image ROI and then they should be analyzed in the whole image context.
+
+**cv.drawContours (image, contours, contourIdx, color, thickness = 1, lineType = cv.LINE_8, hierarchy = new cv.Mat(), maxLevel = INT_MAX, offset = new cv.Point(0, 0))**
+@param image         destination image.
+@param contours      all the input contours.
+@param contourIdx    parameter indicating a contour to draw. If it is negative, all the contours are drawn.
+@param color         color of the contours.
+@param thickness     thickness of lines the contours are drawn with. If it is negative, the contour interiors are drawn.
+@param lineType      line connectivity(see cv.LineTypes).
+@param hierarchy     optional information about hierarchy. It is only needed if you want to draw only some of the contours(see maxLevel).
+
+@param maxLevel      maximal level for drawn contours. If it is 0, only the specified contour is drawn. If it is 1, the function draws the contour(s) and all the nested contours. If it is 2, the function draws the contours, all the nested contours, all the nested-to-nested contours, and so on. This parameter is only taken into account when there is hierarchy available.
+@param offset        optional contour shift parameter.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_contours_begin_contours.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+Contour Approximation Method
+============================
+
+This is the fifth argument in cv.findContours function. What does it denote actually?
+
+Above, we told that contours are the boundaries of a shape with same intensity. It stores the (x,y)
+coordinates of the boundary of a shape. But does it store all the coordinates ? That is specified by
+this contour approximation method.
+
+If you pass cv.ContourApproximationModes.CHAIN_APPROX_NONE.value, all the boundary points are stored. But actually do we need all
+the points? For eg, you found the contour of a straight line. Do you need all the points on the line
+to represent that line? No, we need just two end points of that line. This is what
+cv2.CHAIN_APPROX_SIMPLE does. It removes all redundant points and compresses the contour, thereby
+saving memory.
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_contours/js_contours_hierarchy/js_contours_hierarchy.markdown b/doc/js_tutorials/js_imgproc/js_contours/js_contours_hierarchy/js_contours_hierarchy.markdown
new file mode 100644 (file)
index 0000000..5dc8077
--- /dev/null
@@ -0,0 +1,158 @@
+Contours Hierarchy {#tutorial_js_contours_hierarchy}
+==================
+
+Goal
+----
+
+-   This time, we learn about the hierarchy of contours, i.e. the parent-child relationship in Contours.
+
+Theory
+------
+
+In the last few articles on contours, we have worked with several functions related to contours
+provided by OpenCV. But when we found the contours in image using **cv.findContours()** function,
+we have passed an argument, **Contour Retrieval Mode**. We usually passed **cv.RETR_LIST** or
+**cv.RETR_TREE** and it worked nice. But what does it actually mean ?
+
+Also, in the output, we got three arrays, first is the image, second is our contours, and one more
+output which we named as **hierarchy** (Please checkout the codes in previous articles). But we
+never used this hierarchy anywhere. Then what is this hierarchy and what is it for ? What is its
+relationship with the previous mentioned function argument ?
+
+That is what we are going to deal in this article.
+
+### What is Hierarchy?
+
+Normally we use the **cv.findContours()** function to detect objects in an image, right ? Sometimes
+objects are in different locations. But in some cases, some shapes are inside other shapes. Just
+like nested figures. In this case, we call outer one as **parent** and inner one as **child**. This
+way, contours in an image has some relationship to each other. And we can specify how one contour is
+connected to each other, like, is it child of some other contour, or is it a parent etc.
+Representation of this relationship is called the **Hierarchy**.
+
+Consider an example image below :
+
+![image](images/hierarchy.png)
+
+In this image, there are a few shapes which I have numbered from **0-5**. *2 and 2a* denotes the
+external and internal contours of the outermost box.
+
+Here, contours 0,1,2 are **external or outermost**. We can say, they are in **hierarchy-0** or
+simply they are in **same hierarchy level**.
+
+Next comes **contour-2a**. It can be considered as a **child of contour-2** (or in opposite way,
+contour-2 is parent of contour-2a). So let it be in **hierarchy-1**. Similarly contour-3 is child of
+contour-2a and it comes in next hierarchy. Finally contours 4,5 are the children of contour-3a, and
+they come in the last hierarchy level. From the way I numbered the boxes, I would say contour-4 is
+the first child of contour-3a (It can be contour-5 also).
+
+I mentioned these things to understand terms like **same hierarchy level**, **external contour**,
+**child contour**, **parent contour**, **first child** etc. Now let's get into OpenCV.
+
+### Hierarchy Representation in OpenCV
+
+So each contour has its own information regarding what hierarchy it is, who is its child, who is its
+parent etc. OpenCV represents it as an array of four values : **[Next, Previous, First_Child,
+Parent]**
+
+<center>*"Next denotes next contour at the same hierarchical level."*</center>
+
+For eg, take contour-0 in our picture. Who is next contour in its same level ? It is contour-1. So
+simply put Next = 1. Similarly for Contour-1, next is contour-2. So Next = 2.
+
+What about contour-2? There is no next contour in the same level. So simply, put Next = -1. What
+about contour-4? It is in same level with contour-5. So its next contour is contour-5, so Next = 5.
+
+<center>*"Previous denotes previous contour at the same hierarchical level."*</center>
+
+It is same as above. Previous contour of contour-1 is contour-0 in the same level. Similarly for
+contour-2, it is contour-1. And for contour-0, there is no previous, so put it as -1.
+
+<center>*"First_Child denotes its first child contour."*</center>
+
+There is no need of any explanation. For contour-2, child is contour-2a. So it gets the
+corresponding index value of contour-2a. What about contour-3a? It has two children. But we take
+only first child. And it is contour-4. So First_Child = 4 for contour-3a.
+
+<center>*"Parent denotes index of its parent contour."*</center>
+
+It is just opposite of **First_Child**. Both for contour-4 and contour-5, parent contour is
+contour-3a. For contour-3a, it is contour-3 and so on.
+
+@note If there is no child or parent, that field is taken as -1
+
+So now we know about the hierarchy style used in OpenCV, we can check into Contour Retrieval Modes
+in OpenCV with the help of same image given above. ie what do flags like cv.RETR_LIST,
+cv.RETR_TREE, cv.RETR_CCOMP, cv.RETR_EXTERNAL etc mean?
+
+Contour Retrieval Mode
+----------------------
+
+### 1. RETR_LIST
+
+This is the simplest of the four flags (from explanation point of view). It simply retrieves all the
+contours, but doesn't create any parent-child relationship. **Parents and kids are equal under this
+rule, and they are just contours**. ie they all belongs to same hierarchy level.
+
+So here, 3rd and 4th term in hierarchy array is always -1. But obviously, Next and Previous terms
+will have their corresponding values.
+
+### 2. RETR_EXTERNAL
+
+If you use this flag, it returns only extreme outer flags. All child contours are left behind. **We
+can say, under this law, Only the eldest in every family is taken care of. It doesn't care about
+other members of the family)**.
+
+
+### 3. RETR_CCOMP
+
+This flag retrieves all the contours and arranges them to a 2-level hierarchy. ie external contours
+of the object (ie its boundary) are placed in hierarchy-1. And the contours of holes inside object
+(if any) is placed in hierarchy-2. If any object inside it, its contour is placed again in
+hierarchy-1 only. And its hole in hierarchy-2 and so on.
+
+Just consider the image of a "big white zero" on a black background. Outer circle of zero belongs to
+first hierarchy, and inner circle of zero belongs to second hierarchy.
+
+We can explain it with a simple image. Here I have labelled the order of contours in red color and
+the hierarchy they belongs to, in green color (either 1 or 2). The order is same as the order OpenCV
+detects contours.
+
+![image](images/ccomp_hierarchy.png)
+
+So consider first contour, ie contour-0. It is hierarchy-1. It has two holes, contours 1&2, and they
+belong to hierarchy-2. So for contour-0, Next contour in same hierarchy level is contour-3. And
+there is no previous one. And its first is child is contour-1 in hierarchy-2. It has no parent,
+because it is in hierarchy-1. So its hierarchy array is [3,-1,1,-1]
+
+Now take contour-1. It is in hierarchy-2. Next one in same hierarchy (under the parenthood of
+contour-1) is contour-2. No previous one. No child, but parent is contour-0. So array is
+[2,-1,-1,0].
+
+Similarly contour-2 : It is in hierarchy-2. There is not next contour in same hierarchy under
+contour-0. So no Next. Previous is contour-1. No child, parent is contour-0. So array is
+[-1,1,-1,0].
+
+Contour - 3 : Next in hierarchy-1 is contour-5. Previous is contour-0. Child is contour-4 and no
+parent. So array is [5,0,4,-1].
+
+Contour - 4 : It is in hierarchy 2 under contour-3 and it has no sibling. So no next, no previous,
+no child, parent is contour-3. So array is [-1,-1,-1,3].
+
+
+### 4. RETR_TREE
+
+And this is the final guy, Mr.Perfect. It retrieves all the contours and creates a full family
+hierarchy list. **It even tells, who is the grandpa, father, son, grandson and even beyond... :)**.
+
+For examle, I took above image, rewrite the code for cv.RETR_TREE, reorder the contours as per the
+result given by OpenCV and analyze it. Again, red letters give the contour number and green letters
+give the hierarchy order.
+
+![image](images/tree_hierarchy.png)
+
+Take contour-0 : It is in hierarchy-0. Next contour in same hierarchy is contour-7. No previous
+contours. Child is contour-1. And no parent. So array is [7,-1,1,-1].
+
+Take contour-2 : It is in hierarchy-1. No contour in same level. No previous one. Child is
+contour-2. Parent is contour-0. So array is [-1,-1,2,0].
diff --git a/doc/js_tutorials/js_imgproc/js_contours/js_contours_more_functions/js_contours_more_functions.markdown b/doc/js_tutorials/js_imgproc/js_contours/js_contours_more_functions/js_contours_more_functions.markdown
new file mode 100644 (file)
index 0000000..b753116
--- /dev/null
@@ -0,0 +1,72 @@
+Contours : More Functions {#tutorial_js_contours_more_functions}
+=========================
+
+Goal
+----
+
+-   Convexity defects and how to find them.
+-   Finding shortest distance from a point to a polygon
+-   Matching different shapes
+
+Theory and Code
+---------------
+
+### 1. Convexity Defects
+
+We saw what is convex hull in second chapter about contours. Any deviation of the object from this
+hull can be considered as convexity defect.We can visualize it using an image. We draw a
+line joining start point and end point, then draw a circle at the farthest point.
+
+@note Remember we have to pass returnPoints = False while finding convex hull, in order to find
+convexity defects.
+
+We use the function: **cv.convexityDefects (contour, convexhull, convexityDefect)**
+@param contour              input contour.
+@param convexhull           convex hull obtained using convexHull that should contain indices of the contour points that make the hull
+@param convexityDefect      the output vector of convexity defects. Each convexity defect is represented as 4-element(start_index, end_index, farthest_pt_index, fixpt_depth), where indices are 0-based indices in the original contour of the convexity defect beginning, end and the farthest point, and fixpt_depth is fixed-point approximation (with 8 fractional bits) of the distance between the farthest contour point and the hull. That is, to get the floating-point value of the depth will be fixpt_depth/256.0.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_contours_more_functions_convexityDefects.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### 2. Point Polygon Test
+
+This function finds the shortest distance between a point in the image and a contour. It returns the
+distance which is negative when point is outside the contour, positive when point is inside and zero
+if point is on the contour.
+
+We use the function: **cv.pointPolygonTest (contour, pt, measureDist)**
+@param contour      input contour.
+@param pt           point tested against the contour.
+@param measureDist  if true, the function estimates the signed distance from the point to the nearest contour edge. Otherwise, the function only checks if the point is inside a contour or not.
+
+@code{.js}
+let dist = cv.pointPolygonTest(cnt, new cv.Point(50, 50), true);
+@endcode
+
+### 3. Match Shapes
+
+OpenCV comes with a function **cv.matchShapes()** which enables us to compare two shapes, or two
+contours and returns a metric showing the similarity. The lower the result, the better match it is.
+It is calculated based on the hu-moment values. Different measurement methods are explained in the
+docs.
+
+We use the function: **cv.matchShapes (contour1, contour2, method, parameter)**
+@param contour1      first contour or grayscale image.
+@param contour2      second contour or grayscale image.
+@param method        comparison method, see cv::ShapeMatchModes
+@param parameter     method-specific parameter(not supported now).
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_contours_more_functions_shape.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_contours/js_table_of_contents_contours.markdown b/doc/js_tutorials/js_imgproc/js_contours/js_table_of_contents_contours.markdown
new file mode 100644 (file)
index 0000000..73f6f07
--- /dev/null
@@ -0,0 +1,26 @@
+Contours in OpenCV.js {#tutorial_js_table_of_contents_contours}
+==================
+
+-   @subpage tutorial_js_contours_begin
+
+    Learn to find and draw Contours.
+
+-   @subpage tutorial_js_contour_features
+
+    Learn
+    to find different features of contours like area, perimeter, bounding rectangle etc.
+
+-   @subpage tutorial_js_contour_properties
+
+    Learn
+    to find different properties of contours like Solidity, Mean Intensity etc.
+
+-   @subpage tutorial_js_contours_more_functions
+
+    Learn
+    to find convexity defects, pointPolygonTest, match different shapes etc.
+
+-   @subpage tutorial_js_contours_hierarchy
+
+    Learn
+    about Contour Hierarchy
diff --git a/doc/js_tutorials/js_imgproc/js_filtering/js_filtering.markdown b/doc/js_tutorials/js_imgproc/js_filtering/js_filtering.markdown
new file mode 100644 (file)
index 0000000..8ac7eca
--- /dev/null
@@ -0,0 +1,163 @@
+Smoothing Images {#tutorial_js_filtering}
+================
+
+Goals
+-----
+
+-   Blur the images with various low pass filters
+-   Apply custom-made filters to images (2D convolution)
+
+2D Convolution ( Image Filtering )
+----------------------------------
+
+As in one-dimensional signals, images also can be filtered with various low-pass filters(LPF),
+high-pass filters(HPF) etc. LPF helps in removing noises, blurring the images etc. HPF filters helps
+in finding edges in the images.
+
+OpenCV provides a function **cv.filter2D()** to convolve a kernel with an image. As an example, we
+will try an averaging filter on an image. A 5x5 averaging filter kernel will look like below:
+
+\f[K =  \frac{1}{25} \begin{bmatrix} 1 & 1 & 1 & 1 & 1  \\ 1 & 1 & 1 & 1 & 1 \\ 1 & 1 & 1 & 1 & 1 \\ 1 & 1 & 1 & 1 & 1 \\ 1 & 1 & 1 & 1 & 1 \end{bmatrix}\f]
+
+We use the functions: **cv.filter2D (src, dst, ddepth, kernel, anchor = new cv.Point(-1, -1), delta = 0, borderType = cv.BORDER_DEFAULT)**
+@param src         input image.
+@param dst         output image of the same size and the same number of channels as src.
+@param ddepth      desired depth of the destination image.
+@param kernel      convolution kernel (or rather a correlation kernel), a single-channel floating point matrix; if you want to apply different kernels to different channels, split the image into separate color planes using split and process them individually.
+@param anchor      anchor of the kernel that indicates the relative position of a filtered point within the kernel; the anchor should lie within the kernel; default value new cv.Point(-1, -1) means that the anchor is at the kernel center.
+@param delta       optional value added to the filtered pixels before storing them in dst.
+@param borderType  pixel extrapolation method(see cv.BorderTypes).
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_filtering_filter.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+Image Blurring (Image Smoothing)
+--------------------------------
+
+Image blurring is achieved by convolving the image with a low-pass filter kernel. It is useful for
+removing noises. It actually removes high frequency content (eg: noise, edges) from the image. So
+edges are blurred a little bit in this operation. (Well, there are blurring techniques which doesn't
+blur the edges too). OpenCV provides mainly four types of blurring techniques.
+
+### 1. Averaging
+
+This is done by convolving image with a normalized box filter. It simply takes the average of all
+the pixels under kernel area and replace the central element. This is done by the function
+**cv.blur()** or **cv.boxFilter()**. Check the docs for more details about the kernel. We should
+specify the width and height of kernel. A 3x3 normalized box filter would look like below:
+
+\f[K =  \frac{1}{9} \begin{bmatrix} 1 & 1 & 1  \\ 1 & 1 & 1 \\ 1 & 1 & 1 \end{bmatrix}\f]
+
+We use the functions: **cv.blur (src, dst, ksize, anchor = new cv.Point(-1, -1), borderType = cv.BORDER_DEFAULT)**
+@param src         input image; it can have any number of channels, which are processed independently, but the depth should be CV_8U, CV_16U, CV_16S, CV_32F or CV_64F.
+@param dst         output image of the same size and type as src.
+@param ksize       blurring kernel size.
+@param anchor      anchor point; anchor = new cv.Point(-1, -1) means that the anchor is at the kernel center.
+@param borderType  border mode used to extrapolate pixels outside of the image(see cv.BorderTypes).
+
+**cv.boxFilter (src, dst, ddepth, ksize, anchor = new cv.Point(-1, -1), normalize = true, borderType = cv.BORDER_DEFAULT)**
+@param src         input image.
+@param dst         output image of the same size and type as src.
+@param ddepth      the output image depth (-1 to use src.depth()).
+@param ksize       blurring kernel size.
+@param anchor      anchor point; anchor = new cv.Point(-1, -1) means that the anchor is at the kernel center.
+@param normalize   flag, specifying whether the kernel is normalized by its area or not.
+@param borderType  border mode used to extrapolate pixels outside of the image(see cv.BorderTypes).
+
+@note If you don't want to use normalized box filter, use **cv.boxFilter()**. Pass an argument
+normalize = false to the function.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_filtering_blur.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### 2. Gaussian Blurring
+
+In this, instead of box filter, gaussian kernel is used.
+
+We use the function: **cv.GaussianBlur (src, dst, ksize, sigmaX, sigmaY = 0, borderType = cv.BORDER_DEFAULT)**
+@param src         input image; the image can have any number of channels, which are processed independently, but the depth should be CV_8U, CV_16U, CV_16S, CV_32F or CV_64F.
+@param dst         output image of the same size and type as src.
+@param ksize       blurring kernel size.
+@param sigmaX      Gaussian kernel standard deviation in X direction.
+@param sigmaY      Gaussian kernel standard deviation in Y direction; if sigmaY is zero, it is set to be equal to sigmaX, if both sigmas are zeros, they are computed from ksize.width and ksize.height, to fully control the result regardless of possible future modifications of all this semantics, it is recommended to specify all of ksize, sigmaX, and sigmaY.
+@param borderType  pixel extrapolation method(see cv.BorderTypes).
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_filtering_GaussianBlur.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### 3. Median Blurring
+
+Here, the function **cv.medianBlur()** takes median of all the pixels under kernel area and central
+element is replaced with this median value. This is highly effective against salt-and-pepper noise
+in the images. Interesting thing is that, in the above filters, central element is a newly
+calculated value which may be a pixel value in the image or a new value. But in median blurring,
+central element is always replaced by some pixel value in the image. It reduces the noise
+effectively. Its kernel size should be a positive odd integer.
+
+We use the function: **cv.medianBlur (src, dst, ksize)**
+@param src         input 1, 3, or 4 channel image; when ksize is 3 or 5, the image depth should be cv.CV_8U, cv.CV_16U, or cv.CV_32F, for larger aperture sizes, it can only be cv.CV_8U.
+@param dst         destination array of the same size and type as src.
+@param ksize       aperture linear size; it must be odd and greater than 1, for example: 3, 5, 7 ...
+
+@note The median filter uses cv.BORDER_REPLICATE internally to cope with border pixels.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_filtering_medianBlur.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### 4. Bilateral Filtering
+
+**cv.bilateralFilter()** is highly effective in noise removal while keeping edges sharp. But the
+operation is slower compared to other filters. We already saw that gaussian filter takes the a
+neighbourhood around the pixel and find its gaussian weighted average. This gaussian filter is a
+function of space alone, that is, nearby pixels are considered while filtering. It doesn't consider
+whether pixels have almost same intensity. It doesn't consider whether pixel is an edge pixel or
+not. So it blurs the edges also, which we don't want to do.
+
+Bilateral filter also takes a gaussian filter in space, but one more gaussian filter which is a
+function of pixel difference. Gaussian function of space make sure only nearby pixels are considered
+for blurring while gaussian function of intensity difference make sure only those pixels with
+similar intensity to central pixel is considered for blurring. So it preserves the edges since
+pixels at edges will have large intensity variation.
+
+We use the function: **cv.bilateralFilter (src, dst, d, sigmaColor, sigmaSpace, borderType = cv.BORDER_DEFAULT)**
+@param src          source 8-bit or floating-point, 1-channel or 3-channel image.
+@param dst          output image of the same size and type as src.
+@param d            diameter of each pixel neighborhood that is used during filtering. If it is non-positive, it is computed from sigmaSpace.
+@param sigmaColor   filter sigma in the color space. A larger value of the parameter means that farther colors within the pixel neighborhood will be mixed together, resulting in larger areas of semi-equal color.
+@param sigmaSpace   filter sigma in the coordinate space. A larger value of the parameter means that farther pixels will influence each other as long as their colors are close enough. When d>0, it specifies the neighborhood size regardless of sigmaSpace. Otherwise, d is proportional to sigmaSpace.
+@param borderType   border mode used to extrapolate pixels outside of the image(see cv.BorderTypes).
+
+@note For simplicity, you can set the 2 sigma values to be the same. If they are small (< 10), the filter will not have much effect, whereas if they are large (> 150), they will have a very strong effect, making the image look "cartoonish". Large filters (d > 5) are very slow, so it is recommended to use d=5 for real-time applications, and perhaps d=9 for offline applications that need heavy noise filtering.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_filtering_bilateralFilter.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_geometric_transformations/js_geometric_transformations.markdown b/doc/js_tutorials/js_imgproc/js_geometric_transformations/js_geometric_transformations.markdown
new file mode 100644 (file)
index 0000000..d565e6b
--- /dev/null
@@ -0,0 +1,145 @@
+Geometric Transformations of Images {#tutorial_js_geometric_transformations}
+===================================
+
+Goals
+-----
+
+-   Learn how to apply different geometric transformation to images like translation, rotation, affine
+    transformation etc.
+-   You will learn these functions: **cv.resize**, **cv.warpAffine**, **cv.getAffineTransform** and **cv.warpPerspective**
+
+Transformations
+---------------
+
+
+### Scaling
+
+Scaling is just resizing of the image. OpenCV comes with a function **cv.resize()** for this
+purpose. The size of the image can be specified manually, or you can specify the scaling factor.
+Different interpolation methods are used. Preferable interpolation methods are **cv.INTER_AREA**
+for shrinking and **cv.INTER_CUBIC** (slow) & **cv.INTER_LINEAR** for zooming.
+
+We use the function: **cv.resize (src, dst, dsize, fx = 0, fy = 0, interpolation = cv.INTER_LINEAR)**
+@param src    input image
+@param dst    output image; it has the size dsize (when it is non-zero) or the size computed from src.size(), fx, and fy; the type of dst is the same as of src.
+@param dsize  output image size; if it equals zero, it is computed as:
+                 \f[𝚍𝚜𝚒𝚣𝚎 = 𝚂𝚒𝚣𝚎(𝚛𝚘𝚞𝚗𝚍(𝚏𝚡*𝚜𝚛𝚌.𝚌𝚘𝚕𝚜), 𝚛𝚘𝚞𝚗𝚍(𝚏𝚢*𝚜𝚛𝚌.𝚛𝚘𝚠𝚜))\f]
+                 Either dsize or both fx and fy must be non-zero.
+@param fx     scale factor along the horizontal axis; when it equals 0, it is computed as  \f[(𝚍𝚘𝚞𝚋𝚕𝚎)𝚍𝚜𝚒𝚣𝚎.𝚠𝚒𝚍𝚝𝚑/𝚜𝚛𝚌.𝚌𝚘𝚕𝚜\f]
+
+@param fy     scale factor along the vertical axis; when it equals 0, it is computed as \f[(𝚍𝚘𝚞𝚋𝚕𝚎)𝚍𝚜𝚒𝚣𝚎.𝚑𝚎𝚒𝚐𝚑𝚝/𝚜𝚛𝚌.𝚛𝚘𝚠𝚜\f]
+@param interpolation    interpolation method(see **cv.InterpolationFlags**)
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_geometric_transformations_resize.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### Translation
+
+Translation is the shifting of object's location. If you know the shift in (x,y) direction, let it
+be \f$(t_x,t_y)\f$, you can create the transformation matrix \f$\textbf{M}\f$ as follows:
+
+\f[M = \begin{bmatrix} 1 & 0 & t_x \\ 0 & 1 & t_y  \end{bmatrix}\f]
+
+We use the function: **cv.warpAffine (src, dst, M, dsize, flags = cv.INTER_LINEAR, borderMode = cv.BORDER_CONSTANT, borderValue = new cv.Scalar())**
+@param src          input image.
+@param dst          output image that has the size dsize and the same type as src.
+@param Mat          2 × 3 transformation matrix(cv.CV_64FC1 type).
+@param dsize        size of the output image.
+@param flags        combination of interpolation methods(see cv.InterpolationFlags) and the optional flag WARP_INVERSE_MAP that means that M is the inverse transformation ( 𝚍𝚜𝚝→𝚜𝚛𝚌 )
+@param borderMode   pixel extrapolation method (see cv.BorderTypes); when borderMode = BORDER_TRANSPARENT, it means that the pixels in the destination image corresponding to the "outliers" in the source image are not modified by the function.
+@param borderValue  value used in case of a constant border; by default, it is 0.
+
+rows.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_geometric_transformations_warpAffine.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### Rotation
+
+Rotation of an image for an angle \f$\theta\f$ is achieved by the transformation matrix of the form
+
+\f[M = \begin{bmatrix} cos\theta & -sin\theta \\ sin\theta & cos\theta   \end{bmatrix}\f]
+
+But OpenCV provides scaled rotation with adjustable center of rotation so that you can rotate at any
+location you prefer. Modified transformation matrix is given by
+
+\f[\begin{bmatrix} \alpha &  \beta & (1- \alpha )  \cdot center.x -  \beta \cdot center.y \\ - \beta &  \alpha &  \beta \cdot center.x + (1- \alpha )  \cdot center.y \end{bmatrix}\f]
+
+where:
+
+\f[\begin{array}{l} \alpha =  scale \cdot \cos \theta , \\ \beta =  scale \cdot \sin \theta \end{array}\f]
+
+We use the function: **cv.getRotationMatrix2D (center, angle, scale)**
+@param center    center of the rotation in the source image.
+@param angle     rotation angle in degrees. Positive values mean counter-clockwise rotation (the coordinate origin is assumed to be the top-left corner).
+@param scale     isotropic scale factor.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_geometric_transformations_rotateWarpAffine.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### Affine Transformation
+
+In affine transformation, all parallel lines in the original image will still be parallel in the
+output image. To find the transformation matrix, we need three points from input image and their
+corresponding locations in output image. Then **cv.getAffineTransform** will create a 2x3 matrix
+which is to be passed to **cv.warpAffine**.
+
+We use the function: **cv.getAffineTransform (src, dst)**
+
+@param src    three points([3, 1] size and cv.CV_32FC2 type) from input imag.
+@param dst    three corresponding points([3, 1] size and cv.CV_32FC2 type) in output image.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_geometric_transformations_getAffineTransform.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### Perspective Transformation
+
+For perspective transformation, you need a 3x3 transformation matrix. Straight lines will remain straight even after the transformation. To find this transformation matrix, you need 4 points on the input image and corresponding points on the output image. Among these 4 points, 3 of them should not be collinear. Then transformation matrix can be found by the function **cv.getPerspectiveTransform**. Then apply **cv.warpPerspective** with this 3x3 transformation matrix.
+
+We use the functions: **cv.warpPerspective (src, dst, M, dsize, flags = cv.INTER_LINEAR, borderMode = cv.BORDER_CONSTANT, borderValue = new cv.Scalar())**
+
+@param src          input image.
+@param dst          output image that has the size dsize and the same type as src.
+@param Mat          3 × 3 transformation matrix(cv.CV_64FC1 type).
+@param dsize        size of the output image.
+@param flags        combination of interpolation methods (cv.INTER_LINEAR or cv.INTER_NEAREST) and the optional flag WARP_INVERSE_MAP, that sets M as the inverse transformation (𝚍𝚜𝚝→𝚜𝚛𝚌).
+@param borderMode   pixel extrapolation method (cv.BORDER_CONSTANT or cv.BORDER_REPLICATE).
+@param borderValue  value used in case of a constant border; by default, it is 0.
+
+**cv.getPerspectiveTransform (src, dst)**
+
+@param src          coordinates of quadrangle vertices in the source image.
+@param dst          coordinates of the corresponding quadrangle vertices in the destination image.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_geometric_transformations_warpPerspective.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_grabcut/js_grabcut.markdown b/doc/js_tutorials/js_imgproc/js_grabcut/js_grabcut.markdown
new file mode 100644 (file)
index 0000000..570a490
--- /dev/null
@@ -0,0 +1,76 @@
+Foreground Extraction using GrabCut Algorithm {#tutorial_js_grabcut}
+=========================================================
+
+Goal
+----
+
+-   We will learn GrabCut algorithm to extract foreground in images
+
+Theory
+------
+
+GrabCut algorithm was designed by Carsten Rother, Vladimir Kolmogorov & Andrew Blake from Microsoft
+Research Cambridge, UK. in their paper, ["GrabCut": interactive foreground extraction using iterated
+graph cuts](http://dl.acm.org/citation.cfm?id=1015720) . An algorithm was needed for foreground
+extraction with minimal user interaction, and the result was GrabCut.
+
+How it works from user point of view ? Initially user draws a rectangle around the foreground region
+(foreground region should be completely inside the rectangle). Then algorithm segments it
+iteratively to get the best result. Done. But in some cases, the segmentation won't be fine, like,
+it may have marked some foreground region as background and vice versa. In that case, user need to
+do fine touch-ups. Just give some strokes on the images where some faulty results are there. Strokes
+basically says *"Hey, this region should be foreground, you marked it background, correct it in next
+iteration"* or its opposite for background. Then in the next iteration, you get better results.
+
+What happens in background ?
+
+-   User inputs the rectangle. Everything outside this rectangle will be taken as sure background
+    (That is the reason it is mentioned before that your rectangle should include all the
+    objects). Everything inside rectangle is unknown. Similarly any user input specifying
+    foreground and background are considered as hard-labelling which means they won't change in
+    the process.
+-   Computer does an initial labelling depeding on the data we gave. It labels the foreground and
+    background pixels (or it hard-labels)
+-   Now a Gaussian Mixture Model(GMM) is used to model the foreground and background.
+-   Depending on the data we gave, GMM learns and create new pixel distribution. That is, the
+    unknown pixels are labelled either probable foreground or probable background depending on its
+    relation with the other hard-labelled pixels in terms of color statistics (It is just like
+    clustering).
+-   A graph is built from this pixel distribution. Nodes in the graphs are pixels. Additional two
+    nodes are added, **Source node** and **Sink node**. Every foreground pixel is connected to
+    Source node and every background pixel is connected to Sink node.
+-   The weights of edges connecting pixels to source node/end node are defined by the probability
+    of a pixel being foreground/background. The weights between the pixels are defined by the edge
+    information or pixel similarity. If there is a large difference in pixel color, the edge
+    between them will get a low weight.
+-   Then a mincut algorithm is used to segment the graph. It cuts the graph into two separating
+    source node and sink node with minimum cost function. The cost function is the sum of all
+    weights of the edges that are cut. After the cut, all the pixels connected to Source node
+    become foreground and those connected to Sink node become background.
+-   The process is continued until the classification converges.
+
+It is illustrated in below image (Image Courtesy: <http://www.cs.ru.ac.za/research/g02m1682/>)
+
+![image](images/grabcut_scheme.jpg)
+
+Demo
+----
+
+We use the function: **cv.grabCut (image, mask, rect, bgdModel, fgdModel, iterCount, mode = cv.GC_EVAL)**
+
+@param image      input 8-bit 3-channel image.
+@param mask       input/output 8-bit single-channel mask. The mask is initialized by the function when mode is set to GC_INIT_WITH_RECT. Its elements may have one of the cv.rabCutClasses.
+@param rect       ROI containing a segmented object. The pixels outside of the ROI are marked as "obvious background". The parameter is only used when mode==GC_INIT_WITH_RECT.
+@param bgdModel   temporary array for the background model. Do not modify it while you are processing the same image.
+@param fgdModel   temporary arrays for the foreground model. Do not modify it while you are processing the same image.
+@param iterCount  number of iterations the algorithm should make before returning the result. Note that the result can be refined with further calls with mode==GC_INIT_WITH_MASK or mode==GC_EVAL .
+@param mode       operation mode that could be one of the cv::GrabCutModes
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_grabcut_grabCut.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_gradients/js_gradients.markdown b/doc/js_tutorials/js_imgproc/js_gradients/js_gradients.markdown
new file mode 100644 (file)
index 0000000..21e36a0
--- /dev/null
@@ -0,0 +1,100 @@
+Image Gradients {#tutorial_js_gradients}
+===============
+
+Goal
+----
+
+-   Find Image gradients, edges etc
+-   We will learn following functions : **cv.Sobel()**, **cv.Scharr()**, **cv.Laplacian()** etc
+
+Theory
+------
+
+OpenCV provides three types of gradient filters or High-pass filters, Sobel, Scharr and Laplacian.
+We will see each one of them.
+
+### 1. Sobel and Scharr Derivatives
+
+Sobel operators is a joint Gausssian smoothing plus differentiation operation, so it is more
+resistant to noise. You can specify the direction of derivatives to be taken, vertical or horizontal
+(by the arguments, yorder and xorder respectively). You can also specify the size of kernel by the
+argument ksize. If ksize = -1, a 3x3 Scharr filter is used which gives better results than 3x3 Sobel
+filter. Please see the docs for kernels used.
+
+We use the functions: **cv.Sobel (src, dst, ddepth, dx, dy, ksize = 3, scale = 1, delta = 0, borderType = cv.BORDER_DEFAULT)**
+@param src         input image.
+@param dst         output image of the same size and the same number of channels as src.
+@param ddepth      output image depth(see cv.combinations); in the case of 8-bit input images it will result in truncated derivatives.
+@param dx          order of the derivative x.
+@param dy          order of the derivative y.
+@param ksize       size of the extended Sobel kernel; it must be 1, 3, 5, or 7.
+@param scale       optional scale factor for the computed derivative values.
+@param delta       optional delta value that is added to the results prior to storing them in dst.
+@param borderType  pixel extrapolation method(see cv.BorderTypes).
+
+**cv.Scharr (src, dst, ddepth, dx, dy, scale = 1, delta = 0, borderType = cv.BORDER_DEFAULT)**
+@param src         input image.
+@param dst         output image of the same size and the same number of channels as src.
+@param ddepth      output image depth(see cv.combinations).
+@param dx          order of the derivative x.
+@param dy          order of the derivative y.
+@param scale       optional scale factor for the computed derivative values.
+@param delta       optional delta value that is added to the results prior to storing them in dst.
+@param borderType  pixel extrapolation method(see cv.BorderTypes).
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_gradients_Sobel.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### 2. Laplacian Derivatives
+
+It calculates the Laplacian of the image given by the relation,
+\f$\Delta src = \frac{\partial ^2{src}}{\partial x^2} + \frac{\partial ^2{src}}{\partial y^2}\f$ where
+each derivative is found using Sobel derivatives. If ksize = 1, then following kernel is used for
+filtering:
+
+\f[kernel = \begin{bmatrix} 0 & 1 & 0 \\ 1 & -4 & 1 \\ 0 & 1 & 0  \end{bmatrix}\f]
+
+We use the function: **cv.Laplacian (src, dst, ddepth, ksize = 1, scale = 1, delta = 0, borderType = cv.BORDER_DEFAULT)**
+@param src         input image.
+@param dst         output image of the same size and the same number of channels as src.
+@param ddepth      output image depth.
+@param ksize       aperture size used to compute the second-derivative filters.
+@param scale       optional scale factor for the computed Laplacian values.
+@param delta       optional delta value that is added to the results prior to storing them in dst.
+@param borderType  pixel extrapolation method(see cv.BorderTypes).
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_gradients_Laplacian.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+One Important Matter!
+---------------------
+
+In our last example, output datatype is cv.CV_8U. But there is a slight problem with
+that. Black-to-White transition is taken as Positive slope (it has a positive value) while
+White-to-Black transition is taken as a Negative slope (It has negative value). So when you convert
+data to cv.CV_8U, all negative slopes are made zero. In simple words, you miss that edge.
+
+If you want to detect both edges, better option is to keep the output datatype to some higher forms,
+like cv.CV_16S, cv.CV_64F etc, take its absolute value and then convert back to cv.CV_8U.
+Below code demonstrates this procedure for a horizontal Sobel filter and difference in results.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_gradients_absSobel.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_histograms/js_histogram_backprojection/js_histogram_backprojection.markdown b/doc/js_tutorials/js_imgproc/js_histograms/js_histogram_backprojection/js_histogram_backprojection.markdown
new file mode 100644 (file)
index 0000000..5ad9492
--- /dev/null
@@ -0,0 +1,59 @@
+Histogram - 3 : Histogram Backprojection {#tutorial_js_histogram_backprojection}
+========================================
+
+Goal
+----
+
+-   We will learn about histogram backprojection.
+
+Theory
+------
+
+It was proposed by **Michael J. Swain , Dana H. Ballard** in their paper **Indexing via color
+histograms**.
+
+**What is it actually in simple words?** It is used for image segmentation or finding objects of
+interest in an image. In simple words, it creates an image of the same size (but single channel) as
+that of our input image, where each pixel corresponds to the probability of that pixel belonging to
+our object. In more simpler worlds, the output image will have our object of interest in more white
+compared to remaining part. Well, that is an intuitive explanation. (I can't make it more simpler).
+Histogram Backprojection is used with camshift algorithm etc.
+
+**How do we do it ?** We create a histogram of an image containing our object of interest (in our
+case, the ground, leaving player and other things). The object should fill the image as far as
+possible for better results. And a color histogram is preferred over grayscale histogram, because
+color of the object is a better way to define the object than its grayscale intensity. We then
+"back-project" this histogram over our test image where we need to find the object, ie in other
+words, we calculate the probability of every pixel belonging to the ground and show it. The
+resulting output on proper thresholding gives us the ground alone.
+
+Backprojection in OpenCV
+------------------------
+
+We use the functions: **cv.calcBackProject (images, channels, hist, dst, ranges, scale)**
+
+@param images       source arrays. They all should have the same depth, cv.CV_8U, cv.CV_16U or cv.CV_32F , and the same size. Each of them can have an arbitrary number of channels.
+@param channels     the list of channels used to compute the back projection. The number of channels must match the histogram dimensionality.
+@param hist         input histogram that can be dense or sparse.
+@param dst          destination back projection array that is a single-channel array of the same size and depth as images[0].
+@param ranges       array of arrays of the histogram bin boundaries in each dimension(see cv.calcHist).
+@param scale        optional scale factor for the output back projection.
+
+**cv.normalize (src, dst, alpha = 1, beta = 0, norm_type = cv.NORM_L2, dtype = -1, mask = new cv.Mat())**
+
+@param src        input array.
+@param dst        output array of the same size as src .
+@param alpha      norm value to normalize to or the lower range boundary in case of the range normalization.
+@param beta       upper range boundary in case of the range normalization; it is not used for the norm normalization.
+@param norm_type  normalization type (see cv.NormTypes).
+@param dtype      when negative, the output array has the same type as src; otherwise, it has the same number of channels as src and the depth = CV_MAT_DEPTH(dtype).
+@param mask       optional operation mask.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_histogram_backprojection_calcBackProject.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_histograms/js_histogram_begins/js_histogram_begins.markdown b/doc/js_tutorials/js_imgproc/js_histograms/js_histogram_begins/js_histogram_begins.markdown
new file mode 100644 (file)
index 0000000..fdcadf4
--- /dev/null
@@ -0,0 +1,51 @@
+Histograms - 1 : Find, Plot, Analyze !!! {#tutorial_js_histogram_begins}
+========================================
+
+Goal
+----
+
+-   Find histograms
+-   Plot histograms
+-   You will learn the function: **cv.calcHist()**.
+
+Theory
+------
+
+So what is histogram ? You can consider histogram as a graph or plot, which gives you an overall
+idea about the intensity distribution of an image. It is a plot with pixel values (ranging from 0 to
+255, not always) in X-axis and corresponding number of pixels in the image on Y-axis.
+
+It is just another way of understanding the image. By looking at the histogram of an image, you get
+intuition about contrast, brightness, intensity distribution etc of that image. Almost all image
+processing tools today, provides features on histogram. Below is an image from [Cambridge in Color
+website](http://www.cambridgeincolour.com/tutorials/histograms1.htm), and I recommend you to visit
+the site for more details.
+
+![image](histogram_sample.jpg)
+
+You can see the image and its histogram. (Remember, this histogram is drawn for grayscale image, not
+color image). Left region of histogram shows the amount of darker pixels in image and right region
+shows the amount of brighter pixels. From the histogram, you can see dark region is more than
+brighter region, and amount of midtones (pixel values in mid-range, say around 127) are very less.
+
+Find Histogram
+--------------
+
+We use the function: **cv.calcHist (image, channels, mask, hist, histSize, ranges, accumulate = false)**
+
+@param image        source arrays. They all should have the same depth, cv.CV_8U, cv.CV_16U or cv.CV_32F , and the same size. Each of them can have an arbitrary number of channels.
+@param channels     list of the dims channels used to compute the histogram.
+@param mask         optional mask. If the matrix is not empty, it must be an 8-bit array of the same size as images[i] . The non-zero mask elements mark the array elements counted in the histogram.
+@param hist            output histogram(cv.CV_32F type), which is a dense or sparse dims -dimensional array.
+@param histSize     array of histogram sizes in each dimension.
+@param ranges       array of the dims arrays of the histogram bin boundaries in each dimension.
+@param accumulate   accumulation flag. If it is set, the histogram is not cleared in the beginning when it is allocated. This feature enables you to compute a single histogram from several sets of arrays, or to update the histogram in time.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_histogram_begins_calcHist.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_histograms/js_histogram_equalization/js_histogram_equalization.markdown b/doc/js_tutorials/js_imgproc/js_histograms/js_histogram_equalization/js_histogram_equalization.markdown
new file mode 100644 (file)
index 0000000..332d456
--- /dev/null
@@ -0,0 +1,63 @@
+Histograms - 2: Histogram Equalization {#tutorial_js_histogram_equalization}
+======================================
+
+Goal
+----
+
+-   We will learn the concepts of histogram equalization and use it to improve the contrast of our
+    images.
+
+Theory
+------
+
+Consider an image whose pixel values are confined to some specific range of values only. For eg,
+brighter image will have all pixels confined to high values. But a good image will have pixels from
+all regions of the image. So you need to stretch this histogram to either ends (as given in below
+image, from wikipedia) and that is what Histogram Equalization does (in simple words). This normally
+improves the contrast of the image.
+
+![image](images/histogram_equalization.png)
+
+I would recommend you to read the wikipedia page on [Histogram
+Equalization](http://en.wikipedia.org/wiki/Histogram_equalization) for more details about it. It has
+a very good explanation with worked out examples, so that you would understand almost everything
+after reading that.
+
+Histograms Equalization in OpenCV
+---------------------------------
+
+We use the function: **cv.equalizeHist (src, dst)**
+
+@param src      source 8-bit single channel image.
+@param dst      destination image of the same size and type as src.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_histogram_equalization_equalizeHist.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+CLAHE (Contrast Limited Adaptive Histogram Equalization)
+--------------------------------------------------------
+
+In **adaptive histogram equalization**, image is divided into small blocks called "tiles" (tileSize is 8x8 by default in OpenCV). Then each of these blocks are histogram equalized as usual. So in a small area, histogram would confine to a small region
+(unless there is noise). If noise is there, it will be amplified. To avoid this, **contrast limiting** is applied. If any histogram bin is above the specified contrast limit (by default 40 in OpenCV), those pixels are clipped and distributed uniformly to other bins before applying histogram equalization. After equalization, to remove artifacts in tile borders, bilinear interpolation is applied.
+
+We use the class: **cv.CLAHE (clipLimit = 40, tileGridSize = new cv.Size(8, 8))**
+
+@param clipLimit      threshold for contrast limiting.
+@param tileGridSize   size of grid for histogram equalization. Input image will be divided into equally sized rectangular tiles. tileGridSize defines the number of tiles in row and column.
+
+@note Don't forget to delete CLAHE!
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_histogram_equalization_createCLAHE.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_histograms/js_table_of_contents_histograms.markdown b/doc/js_tutorials/js_imgproc/js_histograms/js_table_of_contents_histograms.markdown
new file mode 100644 (file)
index 0000000..75656e4
--- /dev/null
@@ -0,0 +1,14 @@
+Histograms in OpenCV.js {#tutorial_js_table_of_contents_histograms}
+====================
+
+-   @subpage tutorial_js_histogram_begins
+
+    Learn to find and draw Contours
+
+-   @subpage tutorial_js_histogram_equalization
+
+    Learn to Equalize Histograms to get better contrast for images
+
+-   @subpage tutorial_js_histogram_backprojection
+
+    Learn histogram backprojection to segment colored objects
diff --git a/doc/js_tutorials/js_imgproc/js_houghcircles/js_houghcircles.markdown b/doc/js_tutorials/js_imgproc/js_houghcircles/js_houghcircles.markdown
new file mode 100644 (file)
index 0000000..a5f55c7
--- /dev/null
@@ -0,0 +1,38 @@
+Hough Circle Transform {#tutorial_js_houghcircles}
+======================
+
+Goal
+----
+
+-   We will learn to use Hough Transform to find circles in an image.
+-   We will learn these functions: **cv.HoughCircles()**
+
+Theory
+------
+
+A circle is represented mathematically as \f$(x-x_{center})^2 + (y - y_{center})^2 = r^2\f$ where
+\f$(x_{center},y_{center})\f$ is the center of the circle, and \f$r\f$ is the radius of the circle. From
+equation, we can see we have 3 parameters, so we need a 3D accumulator for hough transform, which
+would be highly ineffective. So OpenCV uses more trickier method, **Hough Gradient Method** which
+uses the gradient information of edges.
+
+We use the function: **cv.HoughCircles (image, circles, method, dp, minDist, param1 = 100, param2 = 100, minRadius = 0, maxRadius = 0)**
+
+@param image       8-bit, single-channel, grayscale input image.
+@param circles     output vector of found circles(cv.CV_32FC3 type). Each vector is encoded as a 3-element floating-point vector (x,y,radius) .
+@param method      detection method(see cv.HoughModes). Currently, the only implemented method is HOUGH_GRADIENT
+@param dp                 inverse ratio of the accumulator resolution to the image resolution. For example, if dp = 1 , the accumulator has the same resolution as the input image. If dp = 2 , the accumulator has half as big width and height.
+@param minDist     minimum distance between the centers of the detected circles. If the parameter is too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is too large, some circles may be missed.
+@param param1      first method-specific parameter. In case of HOUGH_GRADIENT , it is the higher threshold of the two passed to the Canny edge detector (the lower one is twice smaller).
+@param param2      second method-specific parameter. In case of HOUGH_GRADIENT , it is the accumulator threshold for the circle centers at the detection stage. The smaller it is, the more false circles may be detected. Circles, corresponding to the larger accumulator values, will be returned first.
+@param minRadius   minimum circle radius.
+@param maxRadius   maximum circle radius.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_houghcircles_HoughCirclesP.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_houghlines/js_houghlines.markdown b/doc/js_tutorials/js_imgproc/js_houghlines/js_houghlines.markdown
new file mode 100644 (file)
index 0000000..3f78775
--- /dev/null
@@ -0,0 +1,119 @@
+Hough Line Transform {#tutorial_js_houghlines}
+====================
+
+Goal
+----
+
+-   We will understand the concept of the Hough Transform.
+-   We will learn how to use it to detect lines in an image.
+-   We will learn the following functions: **cv.HoughLines()**, **cv.HoughLinesP()**
+
+Theory
+------
+
+The Hough Transform is a popular technique to detect any shape, if you can represent that shape in a
+mathematical form. It can detect the shape even if it is broken or distorted a little bit. We will
+see how it works for a line.
+
+A line can be represented as \f$y = mx+c\f$ or in a parametric form, as
+\f$\rho = x \cos \theta + y \sin \theta\f$ where \f$\rho\f$ is the perpendicular distance from the origin to the
+line, and \f$\theta\f$ is the angle formed by this perpendicular line and the horizontal axis measured in
+counter-clockwise (That direction varies on how you represent the coordinate system. This
+representation is used in OpenCV). Check the image below:
+
+![image](images/houghlines1.svg)
+
+So if the line is passing below the origin, it will have a positive rho and an angle less than 180. If it
+is going above the origin, instead of taking an angle greater than 180, the angle is taken less than 180,
+and rho is taken negative. Any vertical line will have 0 degree and horizontal lines will have 90
+degree.
+
+Now let's see how the Hough Transform works for lines. Any line can be represented in these two terms,
+\f$(\rho, \theta)\f$. So first it creates a 2D array or accumulator (to hold the values of the two parameters)
+and it is set to 0 initially. Let rows denote the \f$\rho\f$ and columns denote the \f$\theta\f$. Size of
+array depends on the accuracy you need. Suppose you want the accuracy of angles to be 1 degree, you will
+need 180 columns. For \f$\rho\f$, the maximum distance possible is the diagonal length of the image. So
+taking one pixel accuracy, the number of rows can be the diagonal length of the image.
+
+Consider a 100x100 image with a horizontal line at the middle. Take the first point of the line. You
+know its (x,y) values. Now in the line equation, put the values \f$\theta = 0,1,2,....,180\f$ and check
+the \f$\rho\f$ you get. For every \f$(\rho, \theta)\f$ pair, you increment value by one in our accumulator
+in its corresponding \f$(\rho, \theta)\f$ cells. So now in accumulator, the cell (50,90) = 1 along with
+some other cells.
+
+Now take the second point on the line. Do the same as above. Increment the values in the cells
+corresponding to \f$(\rho, \theta)\f$ you got. This time, the cell (50,90) = 2. What you actually
+do is voting the \f$(\rho, \theta)\f$ values. You continue this process for every point on the line. At
+each point, the cell (50,90) will be incremented or voted up, while other cells may or may not be
+voted up. This way, at the end, the cell (50,90) will have maximum votes. So if you search the
+accumulator for maximum votes, you get the value (50,90) which says, there is a line in this image
+at a distance 50 from the origin and at angle 90 degrees. It is well shown in the below animation (Image
+Courtesy: [Amos Storkey](http://homepages.inf.ed.ac.uk/amos/hough.html) )
+
+![](houghlinesdemo.gif)
+
+This is how hough transform works for lines. It is simple. Below is an image which shows the accumulator. Bright spots at some locations
+denote they are the parameters of possible lines in the image. (Image courtesy: [Wikipedia](http://en.wikipedia.org/wiki/Hough_transform) )
+
+![](houghlines2.jpg)
+
+Hough Transform in OpenCV
+=========================
+
+Everything explained above is encapsulated in the OpenCV function, **cv.HoughLines()**. It simply returns an array of (\f$(\rho, \theta)\f$ values. \f$\rho\f$ is measured in pixels and \f$\theta\f$ is measured in radians. First parameter,
+Input image should be a binary image, so apply threshold or use canny edge detection before
+applying hough transform.
+
+We use the function: **cv.HoughLines (image, lines, rho, theta, threshold, srn = 0, stn = 0, min_theta = 0, max_theta = Math.PI)**
+@param image       8-bit, single-channel binary source image. The image may be modified by the function.
+@param lines       output vector of lines(cv.32FC2 type). Each line is represented by a two-element vector (ρ,θ) . ρ is the distance from the coordinate origin (0,0). θ is the line rotation angle in radians.
+@param rho        distance resolution of the accumulator in pixels.
+@param theta       angle resolution of the accumulator in radians.
+@param threshold   accumulator threshold parameter. Only those lines are returned that get enough votes
+@param srn         for the multi-scale Hough transform, it is a divisor for the distance resolution rho . The coarse accumulator distance resolution is rho and the accurate accumulator resolution is rho/srn . If both srn=0 and stn=0 , the classical Hough transform is used. Otherwise, both these parameters should be positive.
+@param stn         for the multi-scale Hough transform, it is a divisor for the distance resolution theta.
+@param min_theta   for standard and multi-scale Hough transform, minimum angle to check for lines. Must fall between 0 and max_theta.
+@param max_theta   for standard and multi-scale Hough transform, maximum angle to check for lines. Must fall between min_theta and CV_PI.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_houghlines_HoughLines.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+Probabilistic Hough Transform
+-----------------------------
+
+In the hough transform, you can see that even for a line with two arguments, it takes a lot of
+computation. Probabilistic Hough Transform is an optimization of the Hough Transform we saw. It doesn't
+take all the points into consideration. Instead, it takes only a random subset of points which is
+sufficient for line detection. Just we have to decrease the threshold. See image below which compares
+Hough Transform and Probabilistic Hough Transform in Hough space. (Image Courtesy :
+[Franck Bettinger's home page](http://phdfb1.free.fr/robot/mscthesis/node14.html) )
+
+![image](images/houghlines4.png)
+
+OpenCV implementation is based on Robust Detection of Lines Using the Progressive Probabilistic
+Hough Transform by Matas, J. and Galambos, C. and Kittler, J.V. @cite Matas00.
+
+We use the function: **cv.HoughLinesP (image, lines, rho, theta, threshold, minLineLength = 0, maxLineGap = 0)**
+
+@param image          8-bit, single-channel binary source image. The image may be modified by the function.
+@param lines          output vector of lines(cv.32SC4 type). Each line is represented by a 4-element vector (x1,y1,x2,y2) ,where (x1,y1) and (x2,y2) are the ending points of each detected line segment.
+@param rho            distance resolution of the accumulator in pixels.
+@param theta          angle resolution of the accumulator in radians.
+@param threshold      accumulator threshold parameter. Only those lines are returned that get enough votes
+@param minLineLength  minimum line length. Line segments shorter than that are rejected.
+@param maxLineGap     maximum allowed gap between points on the same line to link them.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_houghlines_HoughLinesP.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_imgproc_camera/js_imgproc_camera.markdown b/doc/js_tutorials/js_imgproc/js_imgproc_camera/js_imgproc_camera.markdown
new file mode 100644 (file)
index 0000000..cbda5b0
--- /dev/null
@@ -0,0 +1,14 @@
+Image Processing for Video Capture {#tutorial_js_imgproc_camera}
+==================================
+
+Goal
+----
+
+-   learn image processing for video capture.
+
+
+\htmlonly
+<iframe src="../../js_imgproc_camera.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_morphological_ops/js_morphological_ops.markdown b/doc/js_tutorials/js_imgproc/js_morphological_ops/js_morphological_ops.markdown
new file mode 100644 (file)
index 0000000..b5e10e0
--- /dev/null
@@ -0,0 +1,177 @@
+Morphological Transformations {#tutorial_js_morphological_ops}
+=============================
+
+Goal
+----
+
+-   We will learn different morphological operations like Erosion, Dilation, Opening, Closing
+        etc.
+-   We will learn different functions like : **cv.erode()**, **cv.dilate()**,
+        **cv.morphologyEx()** etc.
+
+Theory
+------
+
+Morphological transformations are some simple operations based on the image shape. It is normally
+performed on binary images. It needs two inputs, one is our original image, second one is called
+**structuring element** or **kernel** which decides the nature of operation. Two basic morphological
+operators are Erosion and Dilation. Then its variant forms like Opening, Closing, Gradient etc also
+comes into play. We will see them one-by-one with help of following image:
+
+![image](shape.jpg)
+
+### 1. Erosion
+
+The basic idea of erosion is just like soil erosion only, it erodes away the boundaries of
+foreground object (Always try to keep foreground in white). So what it does? The kernel slides
+through the image (as in 2D convolution). A pixel in the original image (either 1 or 0) will be
+considered 1 only if all the pixels under the kernel is 1, otherwise it is eroded (made to zero).
+
+So what happends is that, all the pixels near boundary will be discarded depending upon the size of
+kernel. So the thickness or size of the foreground object decreases or simply white region decreases
+in the image. It is useful for removing small white noises (as we have seen in colorspace chapter),
+detach two connected objects etc.
+
+We use the function: **cv.erode (src, dst, kernel, anchor = new cv.Point(-1, -1), iterations = 1, borderType = cv.BORDER_CONSTANT, borderValue = cv.morphologyDefaultBorderValue())**
+@param src          input image; the number of channels can be arbitrary, but the depth should be one of cv.CV_8U, cv.CV_16U, cv.CV_16S, cv.CV_32F or cv.CV_64F.
+@param dst          output image of the same size and type as src.
+@param kernel       structuring element used for erosion.
+@param anchor       position of the anchor within the element; default value new cv.Point(-1, -1) means that the anchor is at the element center.
+@param iterations   number of times erosion is applied.
+@param borderType   pixel extrapolation method(see cv.BorderTypes).
+@param borderValue  border value in case of a constant border
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_morphological_ops_erode.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### 2. Dilation
+
+It is just opposite of erosion. Here, a pixel element is '1' if atleast one pixel under the kernel
+is '1'. So it increases the white region in the image or size of foreground object increases.
+Normally, in cases like noise removal, erosion is followed by dilation. Because, erosion removes
+white noises, but it also shrinks our object. So we dilate it. Since noise is gone, they won't come
+back, but our object area increases. It is also useful in joining broken parts of an object.
+
+We use the function: **cv.dilate (src, dst, kernel, anchor = new cv.Point(-1, -1), iterations = 1, borderType = cv.BORDER_CONSTANT, borderValue = cv.morphologyDefaultBorderValue())**
+@param src          input image; the number of channels can be arbitrary, but the depth should be one of cv.CV_8U, cv.CV_16U, cv.CV_16S, cv.CV_32F or cv.CV_64F.
+@param dst          output image of the same size and type as src.
+@param kernel       structuring element used for dilation.
+@param anchor       position of the anchor within the element; default value new cv.Point(-1, -1) means that the anchor is at the element center.
+@param iterations   number of times dilation is applied.
+@param borderType   pixel extrapolation method(see cv.BorderTypes).
+@param borderValue  border value in case of a constant border
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_morphological_ops_dilate.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### 3. Opening
+
+Opening is just another name of **erosion followed by dilation**. It is useful in removing noise.
+
+We use the function: **cv.morphologyEx (src, dst, op, kernel, anchor = new cv.Point(-1, -1), iterations = 1, borderType = cv.BORDER_CONSTANT, borderValue = cv.morphologyDefaultBorderValue())**
+@param src          source image. The number of channels can be arbitrary. The depth should be one of cv.CV_8U, cv.CV_16U, cv.CV_16S, cv.CV_32F or cv.CV_64F
+@param dst          destination image of the same size and type as source image.
+@param op           type of a morphological operation, (see cv.MorphTypes).
+@param kernel       structuring element. It can be created using cv.getStructuringElement.
+@param anchor       anchor position with the kernel. Negative values mean that the anchor is at the kernel center.
+@param iterations   number of times dilation is applied.
+@param borderType   pixel extrapolation method(see cv.BorderTypes).
+@param borderValue  border value in case of a constant border. The default value has a special meaning.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_morphological_ops_opening.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### 4. Closing
+
+Closing is reverse of Opening, **Dilation followed by Erosion**. It is useful in closing small holes
+inside the foreground objects, or small black points on the object.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_morphological_ops_closing.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### 5. Morphological Gradient
+
+It is the difference between dilation and erosion of an image.
+
+The result will look like the outline of the object.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_morphological_ops_gradient.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### 6. Top Hat
+
+It is the difference between input image and Opening of the image.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_morphological_ops_topHat.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+### 7. Black Hat
+
+It is the difference between the closing of the input image and input image.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_morphological_ops_blackHat.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+Structuring Element
+-------------------
+
+We manually created a structuring elements in the previous examples with help of cv.Mat.ones. It is
+rectangular shape. But in some cases, you may need elliptical/circular shaped kernels. So for this
+purpose, OpenCV has a function, **cv.getStructuringElement()**. You just pass the shape and size of
+the kernel, you get the desired kernel.
+
+We use the function: **cv.getStructuringElement (shape, ksize, anchor = new cv.Point(-1, -1))**
+@param shape          element shape that could be one of cv.MorphShapes
+@param ksize          size of the structuring element.
+@param anchor         anchor position within the element. The default value [−1,−1] means that the anchor is at the center. Note that only the shape of a cross-shaped element depends on the anchor position. In other cases the anchor just regulates how much the result of the morphological operation is shifted.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_morphological_ops_getStructuringElement.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_pyramids/js_pyramids.markdown b/doc/js_tutorials/js_imgproc/js_pyramids/js_pyramids.markdown
new file mode 100644 (file)
index 0000000..979fae5
--- /dev/null
@@ -0,0 +1,70 @@
+Image Pyramids {#tutorial_js_pyramids}
+==============
+
+Goal
+----
+
+-   We will learn about Image Pyramids
+-   We will learn these functions: **cv.pyrUp()**, **cv.pyrDown()**
+
+Theory
+------
+
+Normally, we used to work with an image of constant size. But in some occassions, we need to work
+with images of different resolution of the same image. For example, while searching for something in
+an image, like face, we are not sure at what size the object will be present in the image. In that
+case, we will need to create a set of images with different resolution and search for object in all
+the images. These set of images with different resolution are called Image Pyramids (because when
+they are kept in a stack with biggest image at bottom and smallest image at top look like a
+pyramid).
+
+There are two kinds of Image Pyramids. 1) Gaussian Pyramid and 2) Laplacian Pyramids
+
+Higher level (Low resolution) in a Gaussian Pyramid is formed by removing consecutive rows and
+columns in Lower level (higher resolution) image. Then each pixel in higher level is formed by the
+contribution from 5 pixels in underlying level with gaussian weights. By doing so, a \f$M \times N\f$
+image becomes \f$M/2 \times N/2\f$ image. So area reduces to one-fourth of original area. It is called
+an Octave. The same pattern continues as we go upper in pyramid (ie, resolution decreases).
+Similarly while expanding, area becomes 4 times in each level. We can find Gaussian pyramids using
+**cv.pyrDown()** and **cv.pyrUp()** functions.
+
+Laplacian Pyramids are formed from the Gaussian Pyramids. There is no exclusive function for that.
+Laplacian pyramid images are like edge images only. Most of its elements are zeros. They are used in
+image compression. A level in Laplacian Pyramid is formed by the difference between that level in
+Gaussian Pyramid and expanded version of its upper level in Gaussian Pyramid.
+
+Downsample
+------
+
+We use the function: **cv.pyrDown (src, dst, dstsize = new cv.Size(0, 0), borderType  = cv.BORDER_DEFAULT)**
+@param src         input image.
+@param dst         output image; it has the specified size and the same type as src.
+@param dstsize     size of the output image.
+@param borderType  pixel extrapolation method(see cv.BorderTypes, cv.BORDER_CONSTANT isn't supported).
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_pyramids_pyrDown.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+Upsample
+------
+
+We use the function: **cv.pyrUp (src, dst, dstsize = new cv.Size(0, 0), borderType  = cv.BORDER_DEFAULT)**
+@param src         input image.
+@param dst         output image; it has the specified size and the same type as src.
+@param dstsize     size of the output image.
+@param borderType  pixel extrapolation method(see cv.BorderTypes, only cv.BORDER_DEFAULT is supported).
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_pyramids_pyrUp.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_table_of_contents_imgproc.markdown b/doc/js_tutorials/js_imgproc/js_table_of_contents_imgproc.markdown
new file mode 100644 (file)
index 0000000..3bb809b
--- /dev/null
@@ -0,0 +1,79 @@
+Image Processing {#tutorial_js_table_of_contents_imgproc}
+==========================
+
+-   @subpage tutorial_js_colorspaces
+
+    Learn how to change images between different color spaces.
+
+-   @subpage tutorial_js_geometric_transformations
+
+    Learn how to apply different geometric transformations to images like rotation, translation etc.
+
+-   @subpage tutorial_js_thresholding
+
+    Learn
+    how to convert images to binary images using global thresholding, Adaptive thresholding, Otsu's
+    binarization etc.
+
+-   @subpage tutorial_js_filtering
+
+    Learn
+    how to blur the images, filter the images with custom kernels etc.
+
+-   @subpage tutorial_js_morphological_ops
+
+    Learn about morphological transformations like Erosion, Dilation, Opening, Closing etc.
+
+-   @subpage tutorial_js_gradients
+
+    Learn
+    how to find image gradients, edges etc.
+
+-   @subpage tutorial_js_canny
+
+    Learn
+    how to find edges with Canny Edge Detection.
+
+-   @subpage tutorial_js_pyramids
+
+    Learn about image pyramids and how to use them for image blending.
+
+-   @subpage tutorial_js_table_of_contents_contours
+
+    Learn
+    about Contours in OpenCV.js.
+
+-   @subpage tutorial_js_table_of_contents_histograms
+
+    Learn
+    about histograms in OpenCV.js.
+
+-   @subpage tutorial_js_table_of_contents_transforms
+
+    Learn
+    different Image Transforms in OpenCV.js like Fourier Transform, Cosine Transform etc.
+
+-   @subpage tutorial_js_template_matching
+
+    Learn
+    how to search for an object in an image using Template Matching.
+
+-   @subpage tutorial_js_houghlines
+
+    Learn how to detect lines in an image.
+
+-   @subpage tutorial_js_houghcircles
+
+    Learn how to detect circles in an image.
+
+-   @subpage tutorial_js_watershed
+
+    Learn how to segment images with watershed segmentation.
+
+-   @subpage tutorial_js_grabcut
+
+    Learn how to extract foreground with GrabCut algorithm.
+
+-   @subpage tutorial_js_imgproc_camera
+
+    Learn image processing for video capture.
diff --git a/doc/js_tutorials/js_imgproc/js_template_matching/js_template_matching.markdown b/doc/js_tutorials/js_imgproc/js_template_matching/js_template_matching.markdown
new file mode 100644 (file)
index 0000000..21c8a78
--- /dev/null
@@ -0,0 +1,45 @@
+Template Matching {#tutorial_js_template_matching}
+=================
+
+Goals
+-----
+
+-   To find objects in an image using Template Matching
+-   You will learn these functions : **cv.matchTemplate()**, **cv.minMaxLoc()**
+
+Theory
+------
+
+Template Matching is a method for searching and finding the location of a template image in a larger
+image. OpenCV comes with a function **cv.matchTemplate()** for this purpose. It simply slides the
+template image over the input image (as in 2D convolution) and compares the template and patch of
+input image under the template image. Several comparison methods are implemented in OpenCV. (You can
+check docs for more details). It returns a grayscale image, where each pixel denotes how much does
+the neighbourhood of that pixel match with template.
+
+If input image is of size (WxH) and template image is of size (wxh), output image will have a size
+of (W-w+1, H-h+1). Once you got the result, you can use **cv.minMaxLoc()** function to find where
+is the maximum/minimum value. Take it as the top-left corner of rectangle and take (w,h) as width
+and height of the rectangle. That rectangle is your region of template.
+
+@note If you are using cv.TM_SQDIFF as comparison method, minimum value gives the best match.
+
+Template Matching in OpenCV
+---------------------------
+
+We use the function: **cv.matchTemplate (image, templ, result, method, mask = new cv.Mat())**
+
+@param image      image where the search is running. It must be 8-bit or 32-bit floating-point.
+@param templ      searched template. It must be not greater than the source image and have the same data type.
+@param result     map of comparison results. It must be single-channel 32-bit floating-point.
+@param method     parameter specifying the comparison method(see cv.TemplateMatchModes).
+@param mask       mask of searched template. It must have the same datatype and size with templ. It is not set by default.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_template_matching_matchTemplate.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_thresholding/js_thresholding.markdown b/doc/js_tutorials/js_imgproc/js_thresholding/js_thresholding.markdown
new file mode 100644 (file)
index 0000000..0ee6aba
--- /dev/null
@@ -0,0 +1,74 @@
+Image Thresholding {#tutorial_js_thresholding}
+==================
+
+Goal
+----
+
+-   In this tutorial, you will learn Simple thresholding, Adaptive thresholding, Otsu's thresholding
+    etc.
+-   You will learn these functions : **cv.threshold**, **cv.adaptiveThreshold** etc.
+
+Simple Thresholding
+-------------------
+
+Here, the matter is straight forward. If pixel value is greater than a threshold value, it is
+assigned one value (may be white), else it is assigned another value (may be black).
+
+We use the function: **cv.threshold (src, dst, thresh, maxval, type)**
+@param src    input array.
+@param dst    output array of the same size and type and the same number of channels as src.
+@param thresh threshold value.
+@param maxval maximum value to use with the cv.THRESH_BINARY and cv.THRESH_BINARY_INV thresholding types.
+@param type   thresholding type(see cv.ThresholdTypes).
+
+**thresholding type** - OpenCV provides different styles of thresholding and it is decided
+by the fourth parameter of the function. Different types are:
+
+-   cv.THRESH_BINARY
+-   cv.THRESH_BINARY_INV
+-   cv.THRESH_TRUNC
+-   cv.THRESH_TOZERO
+-   cv.THRESH_OTSU
+-   cv.THRESH_TRIANGLE
+
+@note Input image should be single channel only in case of cv.THRESH_OTSU or cv.THRESH_TRIANGLE flags
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_thresholding_threshold.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+Adaptive Thresholding
+---------------------
+
+In the previous section, we used a global value as threshold value. But it may not be good in all
+the conditions where image has different lighting conditions in different areas. In that case, we go
+for adaptive thresholding. In this, the algorithm calculate the threshold for a small regions of the
+image. So we get different thresholds for different regions of the same image and it gives us better
+results for images with varying illumination.
+
+We use the function: **cv.adaptiveThreshold (src, dst, maxValue, adaptiveMethod, thresholdType, blockSize, C)**
+@param src             source 8-bit single-channel image.
+@param dst             dstination image of the same size and the same type as src.
+@param maxValue        non-zero value assigned to the pixels for which the condition is satisfied
+@param adaptiveMethod  adaptive thresholding algorithm to use.
+@param thresholdType   thresholding type that must be either cv.THRESH_BINARY or cv.THRESH_BINARY_INV.
+@param blockSize       size of a pixel neighborhood that is used to calculate a threshold value for the pixel: 3, 5, 7, and so on.
+@param C               constant subtracted from the mean or weighted mean (see the details below). Normally, it is positive but may be zero or negative as well.
+
+**adaptiveMethod** - It decides how thresholding value is calculated:
+    -   cv.ADAPTIVE_THRESH_MEAN_C
+    -   cv.ADAPTIVE_THRESH_GAUSSIAN_C
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_thresholding_adaptiveThreshold.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_transforms/js_fourier_transform/js_fourier_transform.markdown b/doc/js_tutorials/js_imgproc/js_transforms/js_fourier_transform/js_fourier_transform.markdown
new file mode 100644 (file)
index 0000000..9b773c9
--- /dev/null
@@ -0,0 +1,89 @@
+Fourier Transform {#tutorial_js_fourier_transform}
+=================
+
+Goal
+----
+
+-   To find the Fourier Transform of images using OpenCV
+-   Some applications of Fourier Transform
+-   We will learn following functions : **cv.dft()** etc
+
+Theory
+------
+
+Fourier Transform is used to analyze the frequency characteristics of various filters. For images,
+**2D Discrete Fourier Transform (DFT)** is used to find the frequency domain. A fast algorithm
+called **Fast Fourier Transform (FFT)** is used for calculation of DFT. Details about these can be
+found in any image processing or signal processing textbooks.
+
+For a sinusoidal signal, \f$x(t) = A \sin(2 \pi ft)\f$, we can say \f$f\f$ is the frequency of signal, and
+if its frequency domain is taken, we can see a spike at \f$f\f$. If signal is sampled to form a discrete
+signal, we get the same frequency domain, but is periodic in the range \f$[- \pi, \pi]\f$ or \f$[0,2\pi]\f$
+(or \f$[0,N]\f$ for N-point DFT). You can consider an image as a signal which is sampled in two
+directions. So taking fourier transform in both X and Y directions gives you the frequency
+representation of image.
+
+More intuitively, for the sinusoidal signal, if the amplitude varies so fast in short time, you can
+say it is a high frequency signal. If it varies slowly, it is a low frequency signal. You can extend
+the same idea to images. Where does the amplitude varies drastically in images ? At the edge points,
+or noises. So we can say, edges and noises are high frequency contents in an image. If there is no
+much changes in amplitude, it is a low frequency component.
+
+Performance of DFT calculation is better for some array size. It is fastest when array size is power
+of two. The arrays whose size is a product of 2’s, 3’s, and 5’s are also processed quite
+efficiently. So if you are worried about the performance of your code, you can modify the size of
+the array to any optimal size (by padding zeros) before finding DFT. OpenCV provides a function, **cv.getOptimalDFTSize()** for this.
+
+Now we will see how to find the Fourier Transform.
+
+Fourier Transform in OpenCV
+---------------------------
+
+Performance of DFT calculation is better for some array size. It is fastest when array size is power of two. The arrays whose size is a product of 2’s, 3’s, and 5’s are also processed quite efficiently. So if you are worried about the performance of your code, you can modify the size of the array to any optimal size (by padding zeros). So how do we find this optimal size ? OpenCV provides a function, cv.getOptimalDFTSize() for this.
+
+We use the functions: **cv.dft (src, dst, flags = 0, nonzeroRows = 0)**
+
+@param src           input array that could be real or complex.
+@param dst           output array whose size and type depends on the flags.
+@param flags         transformation flags, representing a combination of the cv.DftFlags
+@param nonzeroRows   when the parameter is not zero, the function assumes that only the first nonzeroRows rows of the input array (DFT_INVERSE is not set) or only the first nonzeroRows of the output array (DFT_INVERSE is set) contain non-zeros, thus, the function can handle the rest of the rows more efficiently and save some time; this technique is very useful for calculating array cross-correlation or convolution using DFT.
+
+**cv.getOptimalDFTSize (vecsize)**
+
+@param vecsize   vector size.
+
+**cv.copyMakeBorder (src, dst, top, bottom, left, right, borderType, value = new cv.Scalar())**
+
+@param src           input array that could be real or complex.
+@param dst           output array whose size and type depends on the flags.
+@param top           parameter specifying how many top pixels in each direction from the source image rectangle to extrapolate.
+@param bottom        parameter specifying how many bottom pixels in each direction from the source image rectangle to extrapolate.
+@param left          parameter specifying how many left pixels in each direction from the source image rectangle to extrapolate.
+@param right         parameter specifying how many right pixels in each direction from the source image rectangle to extrapolate.
+@param borderType    border type.
+@param value         border value if borderType == cv.BORDER_CONSTANT.
+
+**cv.magnitude (x, y, magnitude)**
+
+@param x          floating-point array of x-coordinates of the vectors.
+@param y          floating-point array of y-coordinates of the vectors; it must have the same size as x.
+@param magnitude  output array of the same size and type as x.
+
+**cv.split (m, mv)**
+
+@param m     input multi-channel array.
+@param mv    output vector of arrays; the arrays themselves are reallocated, if needed.
+
+**cv.merge (mv, dst)**
+
+@param mv      input vector of matrices to be merged; all the matrices in mv must have the same size and the same depth.
+@param dst     output array of the same size and the same depth as mv[0]; The number of channels will be the total number of channels in the matrix array.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_fourier_transform_dft.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_imgproc/js_transforms/js_table_of_contents_transforms.markdown b/doc/js_tutorials/js_imgproc/js_transforms/js_table_of_contents_transforms.markdown
new file mode 100644 (file)
index 0000000..2ed1424
--- /dev/null
@@ -0,0 +1,5 @@
+Image Transforms in OpenCV.js {#tutorial_js_table_of_contents_transforms}
+==========================
+
+-   @subpage tutorial_js_fourier_transform
+    Learn to find the Fourier Transform of images
diff --git a/doc/js_tutorials/js_imgproc/js_watershed/js_watershed.markdown b/doc/js_tutorials/js_imgproc/js_watershed/js_watershed.markdown
new file mode 100644 (file)
index 0000000..1554744
--- /dev/null
@@ -0,0 +1,144 @@
+Image Segmentation with Watershed Algorithm {#tutorial_js_watershed}
+===========================================
+
+Goal
+----
+
+-   We will learn how to use marker-based image segmentation using watershed algorithm
+-   We will learn: **cv.watershed()**
+
+Theory
+------
+
+Any grayscale image can be viewed as a topographic surface where high intensity denotes peaks and
+hills while low intensity denotes valleys. You start filling every isolated valleys (local minima)
+with different colored water (labels). As the water rises, depending on the peaks (gradients)
+nearby, water from different valleys, obviously with different colors will start to merge. To avoid
+that, you build barriers in the locations where water merges. You continue the work of filling water
+and building barriers until all the peaks are under water. Then the barriers you created gives you
+the segmentation result. This is the "philosophy" behind the watershed. You can visit the [CMM
+webpage on watershed](http://cmm.ensmp.fr/~beucher/wtshed.html) to understand it with the help of
+some animations.
+
+But this approach gives you oversegmented result due to noise or any other irregularities in the
+image. So OpenCV implemented a marker-based watershed algorithm where you specify which are all
+valley points are to be merged and which are not. It is an interactive image segmentation. What we
+do is to give different labels for our object we know. Label the region which we are sure of being
+the foreground or object with one color (or intensity), label the region which we are sure of being
+background or non-object with another color and finally the region which we are not sure of
+anything, label it with 0. That is our marker. Then apply watershed algorithm. Then our marker will
+be updated with the labels we gave, and the boundaries of objects will have a value of -1.
+
+Code
+----
+
+Below we will see an example on how to use the Distance Transform along with watershed to segment
+mutually touching objects.
+
+Consider the coins image below, the coins are touching each other. Even if you threshold it, it will
+be touching each other.
+
+We start with finding an approximate estimate of the coins. For that, we can use the Otsu's
+binarization.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_watershed_threshold.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+Now we need to remove any small white noises in the image. For that we can use morphological
+opening. To remove any small holes in the object, we can use morphological closing. So, now we know
+for sure that region near to center of objects are foreground and region much away from the object
+are background. Only region we are not sure is the boundary region of coins.
+
+So we need to extract the area which we are sure they are coins. Erosion removes the boundary
+pixels. So whatever remaining, we can be sure it is coin. That would work if objects were not
+touching each other. But since they are touching each other, another good option would be to find
+the distance transform and apply a proper threshold. Next we need to find the area which we are sure
+they are not coins. For that, we dilate the result. Dilation increases object boundary to
+background. This way, we can make sure whatever region in background in result is really a
+background, since boundary region is removed. See the image below.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_watershed_background.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+The remaining regions are those which we don't have any idea, whether it is coins or background.
+Watershed algorithm should find it. These areas are normally around the boundaries of coins where
+foreground and background meet (Or even two different coins meet). We call it border. It can be
+obtained from subtracting sure_fg area from sure_bg area.
+
+We use the function: **cv.distanceTransform (src, dst, distanceType, maskSize, labelType = cv.CV_32F)**
+
+@param src           8-bit, single-channel (binary) source image.
+@param dst           output image with calculated distances. It is a 8-bit or 32-bit floating-point, single-channel image of the same size as src.
+@param distanceType  type of distance(see cv.DistanceTypes).
+@param maskSize      size of the distance transform mask, see (cv.DistanceTransformMasks).
+@param labelType     type of output image. It can be cv.CV_8U or cv.CV_32F. Type cv.CV_8U can be used only for the first variant of the function and distanceType == DIST_L1.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_watershed_distanceTransform.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+In the thresholded image, we get some regions of coins which we are sure of coins
+and they are detached now. (In some cases, you may be interested in only foreground segmentation,
+not in separating the mutually touching objects. In that case, you need not use distance transform,
+just erosion is sufficient. Erosion is just another method to extract sure foreground area, that's
+all.)
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_watershed_foreground.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+Now we know for sure which are region of coins, which are background and all. So we create marker
+(it is an array of same size as that of original image, but with int32 datatype) and label the
+regions inside it. The regions we know for sure (whether foreground or background) are labelled with
+any positive integers, but different integers, and the area we don't know for sure are just left as
+zero. For this we use **cv.connectedComponents()**. It labels background of the image with 0, then
+other objects are labelled with integers starting from 1.
+
+But we know that if background is marked with 0, watershed will consider it as unknown area. So we
+want to mark it with different integer. Instead, we will mark unknown region, defined by unknown,
+with 0.
+
+Now our marker is ready. It is time for final step, apply watershed. Then marker image will be
+modified. The boundary region will be marked with -1.
+
+We use the function: **cv.connectedComponents (image, labels, connectivity = 8, ltype = cv.CV_32S)**
+@param image         the 8-bit single-channel image to be labeled.
+@param labels        destination labeled image(cv.CV_32SC1 type).
+@param connectivity  8 or 4 for 8-way or 4-way connectivity respectively.
+@param ltype         output image label type. Currently cv.CV_32S and cv.CV_16U are supported.
+
+We use the function: **cv.watershed (image, markers)**
+
+@param image         input 8-bit 3-channel image.
+@param markers       input/output 32-bit single-channel image (map) of markers. It should have the same size as image .
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_watershed_watershed.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_objdetect/js_face_detection/js_face_detection.markdown b/doc/js_tutorials/js_objdetect/js_face_detection/js_face_detection.markdown
new file mode 100644 (file)
index 0000000..a2787e4
--- /dev/null
@@ -0,0 +1,107 @@
+Face Detection using Haar Cascades {#tutorial_js_face_detection}
+==================================
+
+Goal
+----
+
+-   learn the basics of face detection using Haar Feature-based Cascade Classifiers
+-   extend the same for eye detection etc.
+
+Basics
+------
+
+Object Detection using Haar feature-based cascade classifiers is an effective method proposed by Paul Viola and Michael Jones in the 2001 paper, "Rapid Object Detection using a
+Boosted Cascade of Simple Features". It is a machine learning based approach in which a cascade
+function is trained from a lot of positive and negative images. It is then used to detect objects in
+other images.
+
+Here we will work with face detection. Initially, the algorithm needs a lot of positive images
+(images of faces) and negative images (images without faces) to train the classifier. Then we need
+to extract features from it. For this, Haar features shown in below image are used. They are just
+like our convolutional kernel. Each feature is a single value obtained by subtracting the sum of pixels
+under the white rectangle from the sum of pixels under the black rectangle.
+
+![image](images/haar_features.jpg)
+
+Now all possible sizes and locations of each kernel are used to calculate plenty of features. For each
+feature calculation, we need to find the sum of the pixels under the white and black rectangles. To solve this,
+they introduced the integral images. It simplifies calculation of the sum of the pixels, how large may be
+the number of pixels, to an operation involving just four pixels.
+
+But among all these features we calculated, most of them are irrelevant. For example, consider the
+image below. Top row shows two good features. The first feature selected seems to focus on the
+property that the region of the eyes is often darker than the region of the nose and cheeks. The
+second feature selected relies on the property that the eyes are darker than the bridge of the nose.
+But the same windows applying on cheeks or any other place is irrelevant. So how do we select the
+best features out of 160000+ features? It is achieved by **Adaboost**.
+
+![image](images/haar.png)
+
+For this, we apply each and every feature on all the training images. For each feature, it finds the
+best threshold which will classify the faces to positive and negative. But obviously, there will be
+errors or misclassifications. We select the features with minimum error rate, which means they are
+the features that best classifies the face and non-face images. (The process is not as simple as
+this. Each image is given an equal weight in the beginning. After each classification, weights of
+misclassified images are increased. Then again same process is done. New error rates are calculated.
+Also new weights. The process is continued until required accuracy or error rate is achieved or
+required number of features are found).
+
+Final classifier is a weighted sum of these weak classifiers. It is called weak because it alone
+can't classify the image, but together with others forms a strong classifier. The paper says even
+200 features provide detection with 95% accuracy. Their final setup had around 6000 features.
+(Imagine a reduction from 160000+ features to 6000 features. That is a big gain).
+
+So now you take an image. Take each 24x24 window. Apply 6000 features to it. Check if it is face or
+not. Wow.. Wow.. Isn't it a little inefficient and time consuming? Yes, it is. Authors have a good
+solution for that.
+
+In an image, most of the image region is non-face region. So it is a better idea to have a simple
+method to check if a window is not a face region. If it is not, discard it in a single shot. Don't
+process it again. Instead focus on region where there can be a face. This way, we can find more time
+to check a possible face region.
+
+For this they introduced the concept of **Cascade of Classifiers**. Instead of applying all the 6000
+features on a window, group the features into different stages of classifiers and apply one-by-one.
+(Normally first few stages will contain very less number of features). If a window fails the first
+stage, discard it. We don't consider remaining features on it. If it passes, apply the second stage
+of features and continue the process. The window which passes all stages is a face region. How is
+the plan !!!
+
+Authors' detector had 6000+ features with 38 stages with 1, 10, 25, 25 and 50 features in first five
+stages. (Two features in the above image is actually obtained as the best two features from
+Adaboost). According to authors, on an average, 10 features out of 6000+ are evaluated per
+sub-window.
+
+So this is a simple intuitive explanation of how Viola-Jones face detection works. Read paper for
+more details.
+
+Haar-cascade Detection in OpenCV
+--------------------------------
+
+Here we will deal with detection. OpenCV already contains many pre-trained classifiers for face,
+eyes, smile etc. Those XML files are stored in opencv/data/haarcascades/ folder. Let's create a face
+and eye detector with OpenCV.
+
+We use the function: **detectMultiScale (image, objects, scaleFactor = 1.1, minNeighbors = 3, flags = 0, minSize = new cv.Size(0, 0), maxSize = new cv.Size(0, 0))**
+
+@param image               matrix of the type CV_8U containing an image where objects are detected.
+@param objects             vector of rectangles where each rectangle contains the detected object. The rectangles may be partially outside the original image.
+@param scaleFactor         parameter specifying how much the image size is reduced at each image scale.
+@param minNeighbors        parameter specifying how many neighbors each candidate rectangle should have to retain it.
+@param flags               parameter with the same meaning for an old cascade as in the function cvHaarDetectObjects. It is not used for a new cascade.
+@param minSize             minimum possible object size. Objects smaller than this are ignored.
+@param maxSize             maximum possible object size. Objects larger than this are ignored. If maxSize == minSize model is evaluated on single scale.
+
+@note Don't forget to delete CascadeClassifier and RectVector!
+
+Try it
+------
+
+Try this demo using the code above. Canvas elements named haarCascadeDetectionCanvasInput and haarCascadeDetectionCanvasOutput have been prepared. Choose an image and
+click `Try it` to see the result. You can change the code in the textbox to investigate more.
+
+\htmlonly
+<iframe src="../../js_face_detection.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_objdetect/js_face_detection/js_face_detection_camera.markdown b/doc/js_tutorials/js_objdetect/js_face_detection/js_face_detection_camera.markdown
new file mode 100644 (file)
index 0000000..58759ef
--- /dev/null
@@ -0,0 +1,15 @@
+Face Detection in Video Capture {#tutorial_js_face_detection_camera}
+==================================
+
+Goal
+----
+
+-   learn how to detect faces in video capture.
+
+@note  If you don't know how to capture video from camera, please review @ref tutorial_js_video_display.
+
+\htmlonly
+<iframe src="../../js_face_detection_camera.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_objdetect/js_table_of_contents_objdetect.markdown b/doc/js_tutorials/js_objdetect/js_table_of_contents_objdetect.markdown
new file mode 100644 (file)
index 0000000..3a6975f
--- /dev/null
@@ -0,0 +1,11 @@
+Object Detection {#tutorial_js_table_of_contents_objdetect}
+================
+
+-   @subpage tutorial_js_face_detection
+
+    Face detection
+    using haar-cascades
+
+-   @subpage tutorial_js_face_detection_camera
+
+    Face Detection in Video Capture
diff --git a/doc/js_tutorials/js_setup/js_intro/js_intro.markdown b/doc/js_tutorials/js_setup/js_intro/js_intro.markdown
new file mode 100644 (file)
index 0000000..416aa3d
--- /dev/null
@@ -0,0 +1,45 @@
+Introduction to OpenCV.js and Tutorials {#tutorial_js_intro}
+=======================================
+
+OpenCV
+------
+
+OpenCV was created at Intel in 1999 by **Gary Bradski**. The first release came out in 2000. **Vadim Pisarevsky** joined Gary Bradski to manage Intel's Russian software OpenCV team. In 2005, OpenCV was used on Stanley; the vehicle that won the 2005 DARPA Grand Challenge. Later, its active development continued under the support of Willow Garage, with Gary Bradski and Vadim Pisarevsky leading the project. OpenCV now supports a multitude of algorithms related to Computer Vision and Machine Learning and is expanding day by day.
+
+OpenCV supports a wide variety of programming languages such as C++, Python, and Java, and is available on different platforms including Windows, Linux, OS X, Android, and iOS. Interfaces for high-speed GPU operations based on CUDA and OpenCL are also under active development. OpenCV.js brings OpenCV to the open web platform and makes it available to the JavaScript programmer.
+
+OpenCV.js: OpenCV for the JavaScript programmer
+-------------
+
+Web is the most ubiquitous open computing platform. With HTML5 standards implemented in every browser, web applications are able to render online video with HTML5 video tags, capture webcam video via WebRTC API, and access each pixel of a video frame via canvas API. With abundance of available multimedia content, web developers are in need of a wide array of image and vision processing algorithms in JavaScript to build innovative applications. This requirement is even more essential for emerging applications on the web, such as Web Virtual Reality (WebVR) and Augmented Reality (WebAR). All of these use cases demand efficient implementations of computation-intensive vision kernels on web.
+
+[Emscripten](http://kripken.github.io/emscripten-site) is an LLVM-to-JavaScript compiler. It takes LLVM bitcode - which can be generated from C/C++ using clang, and compiles that into asm.js or WebAssembly that can execute directly inside the web browsers. .  Asm.js is a highly optimizable, low-level subset of JavaScript. Asm.js enables ahead-of-time compilation and optimization in JavaScript engine that provide near-to-native execution speed. WebAssembly is a new portable, size- and load-time-efficient binary format suitable for compilation to the web. WebAssembly aims to execute at native speed. WebAssembly is currently being designed as an open standard by W3C.
+
+OpenCV.js is a JavaScript binding for selected subset of OpenCV functions for the web platform. It allows emerging web applications with multimedia processing to benefit from the wide variety of vision functions available in OpenCV. OpenCV.js leverages Emscripten to compile OpenCV functions into asm.js or WebAssembly targets, and provides a JavaScript APIs for web application to access them. The future versions of the library will take advantage of acceleration APIs that are available on the Web such as SIMD and multi-threaded execution.
+
+OpenCV.js was initially created in Parallel Architectures and Systems Group at University of California Irvine (UCI) as a research project funded by Intel Corporation. OpenCV.js was further improved and integrated into the OpenCV project as part of Google Summer of Code 2017 program.
+
+OpenCV.js Tutorials
+-----------------------
+
+OpenCV introduces a new set of tutorials that will guide you through various functions available in OpenCV.js. **This guide is mainly focused on OpenCV 3.x version**.
+
+The purpose of OpenCV.js tutorials is to:
+-# Help with adaptability of OpenCV in web development
+-# Help the web community, developers and computer vision researchers to interactively access a variety of web-based OpenCV examples to help them understand specific vision algorithms.
+
+Because OpenCV.js is able to run directly inside browser, the OpenCV.js tutorial web pages are intuitive and interactive. For example, using WebRTC API and evaluating JavaScript code would allow developers to change the parameters of CV functions and do live CV coding on web pages to see the results in real time.
+
+Prior knowledge of JavaScript and web application development is recommended to understand this guide.
+
+Contributors
+------------
+
+Below is the list of contributors of OpenCV.js bindings and tutorials.
+
+-  Sajjad Taheri (Architect of the initial version and GSoC mentor, University of California, Irvine)
+-  Congxiang Pan (GSoC student, Shanghai Jiao Tong University)
+-  Gang Song (GSoC student, Shanghai Jiao Tong University)
+-  Wenyao Gan (Student intern, Shanghai Jiao Tong University)
+-  Mohammad Reza Haghighat (Project initiator & sponsor, Intel Corporation)
+-  Ningxin Hu (Students' supervisor, Intel Corporation)
\ No newline at end of file
diff --git a/doc/js_tutorials/js_setup/js_setup/js_setup.markdown b/doc/js_tutorials/js_setup/js_setup/js_setup.markdown
new file mode 100644 (file)
index 0000000..5f6b43b
--- /dev/null
@@ -0,0 +1,105 @@
+Build OpenCV.js {#tutorial_js_setup}
+===============================
+
+
+Installing Emscripten
+-----------------------------
+
+[Emscripten](https://github.com/kripken/emscripten) is an LLVM-to-JavaScript compiler. We will use Emscripten to build OpenCV.js.
+
+To Install Emscripten, follow instructions of [Emscripten SDK](https://kripken.github.io/emscripten-site/docs/getting_started/downloads.html).
+
+For example:
+@code{.bash}
+./emsdk update
+./emsdk install latest
+./emsdk activate latest
+@endcode
+
+@note
+To compile to [WebAssembly](http://webassembly.org), you need to install and activate [Binaryen](https://github.com/WebAssembly/binaryen) with the `emsdk` command. Please refer to [Developer's Guide](http://webassembly.org/getting-started/developers-guide/) for more details.
+
+After install, ensure the `EMSCRIPTEN` environment is setup correctly.
+
+For example:
+@code{.bash}
+source ./emsdk_env.sh
+echo ${EMSCRIPTEN}
+@endcode
+
+Obtaining OpenCV Source Code
+--------------------------
+
+You can use the latest stable OpenCV version or you can grab the latest snapshot from our [Git
+repository](https://github.com/opencv/opencv.git).
+
+### Obtaining the Latest Stable OpenCV Version
+
+-   Go to our [releases page](http://opencv.org/releases.html).
+-   Download the source archive and unpack it.
+
+### Obtaining the Cutting-edge OpenCV from the Git Repository
+
+Launch Git client and clone [OpenCV repository](http://github.com/opencv/opencv).
+
+For example:
+@code{.bash}
+git clone https://github.com/opencv/opencv.git
+@endcode
+
+@note
+It requires `git` installed in your development environment.
+
+Building OpenCV.js from Source
+---------------------------------------
+
+-#  To build `opencv.js`, execute python script `<opencv_src_dir>/platforms/js/build_js.py <build_dir>`.
+
+    For example, to build in `build_js` directory:
+    @code{.bash}
+    cd opencv
+    python ./platforms/js/build_js.py build_js
+    @endcode
+
+    @note
+    It requires `python` and `cmake` installed in your development environment.
+
+-#  The build script builds asm.js version by default. To build WebAssembly version, append `--build_wasm` switch.
+
+    For example, to build wasm version in `build_wasm` directory:
+    @code{.bash}
+    python ./platforms/js/build_js.py build_wasm --build_wasm
+    @endcode
+
+-#  [optional] To build documents, append `--build_doc` option.
+
+    For example:
+    @code{.bash}
+    python ./platforms/js/build_js.py build_js --build_doc
+    @endcode
+
+    @note
+    It requires `doxygen` installed in your development environment.
+
+-#  [optional] To build tests, append `--build_test` option.
+
+    For example:
+    @code{.bash}
+    python ./platforms/js/build_js.py build_js --build_test
+    @endcode
+
+    To run tests, launch a local web server in \<build_dir\>/bin folder. For example, node http-server which serves on `localhost:8080`.
+
+    Navigate the web browser to `http://localhost:8080/tests.html`, which runs the unit tests automatically.
+
+    You can also run tests using Node.js.
+
+    For example:
+    @code{.sh}
+    cd bin
+    npm install
+    node tests.js
+    @endcode
+
+    @note
+    It requires `node` installed in your development environment.
diff --git a/doc/js_tutorials/js_setup/js_table_of_contents_setup.markdown b/doc/js_tutorials/js_setup/js_table_of_contents_setup.markdown
new file mode 100644 (file)
index 0000000..4570e28
--- /dev/null
@@ -0,0 +1,14 @@
+Introduction to OpenCV.js {#tutorial_js_table_of_contents_setup}
+======================
+
+-   @subpage tutorial_js_intro
+
+    Introduction of OpenCV.js and Tutorials
+
+-   @subpage tutorial_js_usage
+
+    Get started with OpenCV.js
+
+-   @subpage tutorial_js_setup
+
+    Build OpenCV.js from source
diff --git a/doc/js_tutorials/js_setup/js_usage/js_usage.markdown b/doc/js_tutorials/js_setup/js_usage/js_usage.markdown
new file mode 100644 (file)
index 0000000..72f481d
--- /dev/null
@@ -0,0 +1,140 @@
+Using OpenCV.js {#tutorial_js_usage}
+===============================
+
+Steps
+-----
+
+In this tutorial, you will learn how to include and start to use `opencv.js` inside a web page.
+
+### Create a web page
+
+First, let's create a simple web page that is able to upload an image.
+
+@code{.js}
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Hello OpenCV.js</title>
+</head>
+<body>
+<h2>Hello OpenCV.js</h2>
+<div>
+  <div class="inputoutput">
+    <img id="imageSrc" alt="No Image" />
+    <div class="caption">imageSrc <input type="file" id="fileInput" name="file" /></div>
+  </div>
+</div>
+<script type="text/javascript">
+let imgElement = document.getElementById("imageSrc")
+let inputElement = document.getElementById("fileInput");
+inputElement.addEventListener("change", (e) => {
+  imgElement.src = URL.createObjectURL(e.target.files[0]);
+}, false);
+</script>
+</body>
+</html>
+@endcode
+
+To run this web page, copy the content above and save to a local index.html file. To run it, open it using your web browser.
+
+@note It is a better practice to use a local web server to host the index.html.
+
+### Include OpenCV.js
+
+Set the URL of `opencv.js` to `src` attribute of \<script\> tag.
+
+@note For this tutorial, we host `opencv.js` at same folder as index.html.
+
+Example for synchronous loading:
+@code{.js}
+<script src="opencv.js" type="text/javascript"></script>
+@endcode
+
+You may want to load `opencv.js` asynchronously by `async` attribute in \<script\> tag. To be notified when `opencv.js` is ready, you can register a callback to `onload` attribute.
+
+Example for asynchronous loading
+@code{.js}
+<script async src="opencv.js" onload="onOpenCvReady();" type="text/javascript"></script>
+@endcode
+
+### Use OpenCV.js
+
+Once `opencv.js` is ready, you can access OpenCV objects and functions through `cv` object.
+
+For example, you can create a cv.Mat from an image by cv.imread.
+
+@note Because image loading is asynchronous, you need to put cv.Mat creation inside the `onload` callback.
+
+@code{.js}
+imgElement.onload = function() {
+  let mat = cv.imread(imgElement);
+}
+@endcode
+
+Many OpenCV functions can be used to process cv.Mat. You can refer to other tutorials, such as @ref tutorial_js_table_of_contents_imgproc, for details.
+
+In this tutorial, we just show a cv.Mat on screen. To show a cv.Mat, you need a canvas element.
+
+@code{.js}
+<canvas id="outputCanvas"></canvas>
+@endcode
+
+You can use cv.imshow to show cv.Mat on the canvas.
+@code{.js}
+cv.imshow(mat, "outputCanvas");
+@endcode
+
+Putting all of the steps together, the final index.html is shown below.
+
+@code{.js}
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Hello OpenCV.js</title>
+</head>
+<body>
+<h2>Hello OpenCV.js</h2>
+<p id="status">OpenCV.js is loading...</p>
+<div>
+  <div class="inputoutput">
+    <img id="imageSrc" alt="No Image" />
+    <div class="caption">imageSrc <input type="file" id="fileInput" name="file" /></div>
+  </div>
+  <div class="inputoutput">
+    <canvas id="canvasOutput" ></canvas>
+    <div class="caption">canvasOutput</div>
+  </div>
+</div>
+<script type="text/javascript">
+let imgElement = document.getElementById('imageSrc');
+let inputElement = document.getElementById('fileInput');
+inputElement.addEventListener('change', (e) => {
+  imgElement.src = URL.createObjectURL(e.target.files[0]);
+}, false);
+
+imgElement.onload = function() {
+  let mat = cv.imread(imgElement);
+  cv.imshow('canvasOutput', mat);
+  mat.delete();
+};
+
+function onOpenCvReady() {
+  document.getElementById('status').innerHTML = 'OpenCV.js is ready.';
+}
+</script>
+<script async src="opencv.js" onload="onOpenCvReady();" type="text/javascript"></script>
+</body>
+</html>
+@endcode
+
+@note You have to call delete method of cv.Mat to free memory allocated in Emscripten's heap. Please refer to [Memeory management of Emscripten](https://kripken.github.io/emscripten-site/docs/porting/connecting_cpp_and_javascript/embind.html#memory-management) for details.
+
+Try it
+------
+\htmlonly
+<iframe src="../../js_setup_usage.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
\ No newline at end of file
diff --git a/doc/js_tutorials/js_tutorials.markdown b/doc/js_tutorials/js_tutorials.markdown
new file mode 100644 (file)
index 0000000..c8a8f92
--- /dev/null
@@ -0,0 +1,28 @@
+OpenCV.js Tutorials {#tutorial_js_root}
+=======================
+-   @subpage tutorial_js_table_of_contents_setup
+
+    Learn how to use OpenCV.js inside your web pages!
+
+-   @subpage tutorial_js_table_of_contents_gui
+
+    Here you will learn how to read and display images and videos, and create trackbar.
+
+-   @subpage tutorial_js_table_of_contents_core
+
+    In this section you will learn some basic operations on image, some mathematical tools and some data structures etc.
+
+-   @subpage tutorial_js_table_of_contents_imgproc
+
+    In this section
+    you will learn different image processing functions inside OpenCV.
+
+-   @subpage tutorial_js_table_of_contents_video
+
+    In this section you
+    will learn different techniques to work with videos like object tracking etc.
+
+-   @subpage tutorial_js_table_of_contents_objdetect
+
+    In this section you
+    will object detection techniques like face detection etc.
diff --git a/doc/js_tutorials/js_video/js_bg_subtraction/js_bg_subtraction.markdown b/doc/js_tutorials/js_video/js_bg_subtraction/js_bg_subtraction.markdown
new file mode 100644 (file)
index 0000000..a072dcc
--- /dev/null
@@ -0,0 +1,64 @@
+Background Subtraction {#tutorial_js_bg_subtraction}
+======================
+
+Goal
+----
+
+-   We will familiarize with the background subtraction methods available in OpenCV.js.
+
+Basics
+------
+
+Background subtraction is a major preprocessing steps in many vision based applications. For
+example, consider the cases like visitor counter where a static camera takes the number of visitors
+entering or leaving the room, or a traffic camera extracting information about the vehicles etc. In
+all these cases, first you need to extract the person or vehicles alone. Technically, you need to
+extract the moving foreground from static background.
+
+If you have an image of background alone, like image of the room without visitors, image of the road
+without vehicles etc, it is an easy job. Just subtract the new image from the background. You get
+the foreground objects alone. But in most of the cases, you may not have such an image, so we need
+to extract the background from whatever images we have. It become more complicated when there is
+shadow of the vehicles. Since shadow is also moving, simple subtraction will mark that also as
+foreground. It complicates things.
+
+OpenCV.js has implemented one algorithm for this purpose, which is very easy to use.
+
+BackgroundSubtractorMOG2
+------------------------
+
+It is a Gaussian Mixture-based Background/Foreground Segmentation Algorithm. It is based on two
+papers by Z.Zivkovic, "Improved adaptive Gausian mixture model for background subtraction" in 2004
+and "Efficient Adaptive Density Estimation per Image Pixel for the Task of Background Subtraction"
+in 2006. One important feature of this algorithm is that it selects the appropriate number of
+gaussian distribution for each pixel. It provides better adaptibility to varying scenes due illumination
+changes etc.
+
+While coding, we use the constructor: **cv.BackgroundSubtractorMOG2 (history = 500, varThreshold = 16,
+detectShadows = true)**
+@param history         Length of the history.
+@param varThreshold    Threshold on the squared distance between the pixel and the sample to decide
+whether a pixel is close to that sample. This parameter does not affect the background update.
+@param detectShadows   If true, the algorithm will detect shadows and mark them. It decreases the
+speed a bit, so if you do not need this feature, set the parameter to false.
+@return                instance of cv.BackgroundSubtractorMOG2
+
+Use **apply (image, fgmask, learningRate = -1)** method to get the foreground mask
+@param image         Next video frame. Floating point frame will be used without scaling and should
+be in range [0,255].
+@param fgmask        The output foreground mask as an 8-bit binary image.
+@param learningRate  The value between 0 and 1 that indicates how fast the background model is learnt.
+Negative parameter value makes the algorithm to use some automatically chosen learning rate. 0 means
+that the background model is not updated at all, 1 means that the background model is completely
+reinitialized from the last frame.
+
+@note The instance of cv.BackgroundSubtractorMOG2 should be deleted manually.
+
+Try it
+------
+
+\htmlonly
+<iframe src="../../js_bg_subtraction.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
diff --git a/doc/js_tutorials/js_video/js_lucas_kanade/images/optical_flow_basic1.jpg b/doc/js_tutorials/js_video/js_lucas_kanade/images/optical_flow_basic1.jpg
new file mode 100644 (file)
index 0000000..718d83c
Binary files /dev/null and b/doc/js_tutorials/js_video/js_lucas_kanade/images/optical_flow_basic1.jpg differ
diff --git a/doc/js_tutorials/js_video/js_lucas_kanade/js_lucas_kanade.markdown b/doc/js_tutorials/js_video/js_lucas_kanade/js_lucas_kanade.markdown
new file mode 100644 (file)
index 0000000..1d8fa29
--- /dev/null
@@ -0,0 +1,171 @@
+Optical Flow {#tutorial_js_lucas_kanade}
+============
+
+Goal
+----
+
+-   We will understand the concepts of optical flow and its estimation using Lucas-Kanade
+    method.
+-   We will use functions like **cv.calcOpticalFlowPyrLK()** to track feature points in a
+    video.
+
+Optical Flow
+------------
+
+Optical flow is the pattern of apparent motion of image objects between two consecutive frames
+caused by the movemement of object or camera. It is 2D vector field where each vector is a
+displacement vector showing the movement of points from first frame to second. Consider the image
+below (Image Courtesy: [Wikipedia article on Optical
+Flow](http://en.wikipedia.org/wiki/Optical_flow)).
+
+![image](images/optical_flow_basic1.jpg)
+
+It shows a ball moving in 5 consecutive frames. The arrow shows its displacement vector. Optical
+flow has many applications in areas like :
+
+-   Structure from Motion
+-   Video Compression
+-   Video Stabilization ...
+
+Optical flow works on several assumptions:
+
+-#  The pixel intensities of an object do not change between consecutive frames.
+2.  Neighbouring pixels have similar motion.
+
+Consider a pixel \f$I(x,y,t)\f$ in first frame (Check a new dimension, time, is added here. Earlier we
+were working with images only, so no need of time). It moves by distance \f$(dx,dy)\f$ in next frame
+taken after \f$dt\f$ time. So since those pixels are the same and intensity does not change, we can say,
+
+\f[I(x,y,t) = I(x+dx, y+dy, t+dt)\f]
+
+Then take taylor series approximation of right-hand side, remove common terms and divide by \f$dt\f$ to
+get the following equation:
+
+\f[f_x u + f_y v + f_t = 0 \;\f]
+
+where:
+
+\f[f_x = \frac{\partial f}{\partial x} \; ; \; f_y = \frac{\partial f}{\partial y}\f]\f[u = \frac{dx}{dt} \; ; \; v = \frac{dy}{dt}\f]
+
+Above equation is called Optical Flow equation. In it, we can find \f$f_x\f$ and \f$f_y\f$, they are image
+gradients. Similarly \f$f_t\f$ is the gradient along time. But \f$(u,v)\f$ is unknown. We cannot solve this
+one equation with two unknown variables. So several methods are provided to solve this problem and
+one of them is Lucas-Kanade.
+
+### Lucas-Kanade method
+
+We have seen an assumption before, that all the neighbouring pixels will have similar motion.
+Lucas-Kanade method takes a 3x3 patch around the point. So all the 9 points have the same motion. We
+can find \f$(f_x, f_y, f_t)\f$ for these 9 points. So now our problem becomes solving 9 equations with
+two unknown variables which is over-determined. A better solution is obtained with least square fit
+method. Below is the final solution which is two equation-two unknown problem and solve to get the
+solution.
+
+\f[\begin{bmatrix} u \\ v \end{bmatrix} =
+\begin{bmatrix}
+    \sum_{i}{f_{x_i}}^2  &  \sum_{i}{f_{x_i} f_{y_i} } \\
+    \sum_{i}{f_{x_i} f_{y_i}} & \sum_{i}{f_{y_i}}^2
+\end{bmatrix}^{-1}
+\begin{bmatrix}
+    - \sum_{i}{f_{x_i} f_{t_i}} \\
+    - \sum_{i}{f_{y_i} f_{t_i}}
+\end{bmatrix}\f]
+
+( Check similarity of inverse matrix with Harris corner detector. It denotes that corners are better
+points to be tracked.)
+
+So from user point of view, idea is simple, we give some points to track, we receive the optical
+flow vectors of those points. But again there are some problems. Until now, we were dealing with
+small motions. So it fails when there is large motion. So again we go for pyramids. When we go up in
+the pyramid, small motions are removed and large motions becomes small motions. So applying
+Lucas-Kanade there, we get optical flow along with the scale.
+
+Lucas-Kanade Optical Flow in OpenCV.js
+-----------------------------------
+
+We use the function: **cv.calcOpticalFlowPyrLK (prevImg, nextImg, prevPts, nextPts, status, err, winSize =
+new cv.Size(21, 21), maxLevel = 3, criteria = new cv.TermCriteria(cv.TermCriteria_COUNT+
+cv.TermCriteria_EPS, 30, 0.01), flags = 0, minEigThreshold = 1e-4)**.
+@param prevImg          first 8-bit input image or pyramid constructed by buildOpticalFlowPyramid.
+@param nextImg          second input image or pyramid of the same size and the same type as prevImg.
+@param prevPts          vector of 2D points for which the flow needs to be found; point coordinates must
+be single-precision floating-point numbers.
+@param nextPts          output vector of 2D points (with single-precision floating-point coordinates)
+containing the calculated new positions of input features in the second image; when cv.OPTFLOW_USE_
+INITIAL_FLOW flag is passed, the vector must have the same size as in the input.
+@param status           output status vector (of unsigned chars); each element of the vector is set to 1
+if the flow for the corresponding features has been found, otherwise, it is set to 0.
+@param err              output vector of errors; each element of the vector is set to an error for the
+corresponding feature, type of the error measure can be set in flags parameter; if the flow wasn't
+found then the error is not defined (use the status parameter to find such cases).
+@param winSize          size of the search window at each pyramid level.
+@param maxLevel         0-based maximal pyramid level number; if set to 0, pyramids are not used (single
+level), if set to 1, two levels are used, and so on; if pyramids are passed to input then algorithm
+will use as many levels as pyramids have but no more than maxLevel.
+@param criteria         parameter, specifying the termination criteria of the iterative search algorithm
+(after the specified maximum number of iterations criteria.maxCount or when the search window moves
+by less than criteria.epsilon.
+@param flags            operation flags:
+- cv.OPTFLOW_USE_INITIAL_FLOW uses initial estimations, stored in nextPts; if the flag is not set,
+then prevPts is copied to nextPts and is considered the initial estimate.
+- cv.OPTFLOW_LK_GET_MIN_EIGENVALS use minimum eigen values as an error measure (see minEigThreshold
+description); if the flag is not set, then L1 distance between patches around the original and a moved
+point, divided by number of pixels in a window, is used as a error measure.
+@param minEigThreshold  the algorithm calculates the minimum eigen value of a 2x2 normal matrix of
+optical flow equations, divided by number of pixels in a window; if this value is less than
+minEigThreshold, then a corresponding feature is filtered out and its flow is not processed, so it
+allows to remove bad points and get a performance boost.
+
+### Try it
+
+\htmlonly
+<iframe src="../../js_optical_flow_lucas_kanade.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+(This code doesn't check how correct are the next keypoints. So even if any feature point disappears
+in image, there is a chance that optical flow finds the next point which may look close to it. So
+actually for a robust tracking, corner points should be detected in particular intervals.)
+
+Dense Optical Flow in OpenCV.js
+-------------------------------
+
+Lucas-Kanade method computes optical flow for a sparse feature set (in our example, corners detected
+using Shi-Tomasi algorithm). OpenCV.js provides another algorithm to find the dense optical flow. It
+computes the optical flow for all the points in the frame. It is based on Gunner Farneback's
+algorithm which is explained in "Two-Frame Motion Estimation Based on Polynomial Expansion" by
+Gunner Farneback in 2003.
+
+We use the function: **cv.calcOpticalFlowFarneback (prev, next, flow, pyrScale, levels, winsize,
+iterations, polyN, polySigma, flags)**
+@param prev        first 8-bit single-channel input image.
+@param next        second input image of the same size and the same type as prev.
+@param flow        computed flow image that has the same size as prev and type CV_32FC2.
+@param pyrScale    parameter, specifying the image scale (<1) to build pyramids for each image;
+pyrScale=0.5 means a classical pyramid, where each next layer is twice smaller than the previous one.
+@param levels      number of pyramid layers including the initial image; levels=1 means that no extra
+layers are created and only the original images are used.
+@param winsize     averaging window size; larger values increase the algorithm robustness to image noise
+and give more chances for fast motion detection, but yield more blurred motion field.
+@param iterations  number of iterations the algorithm does at each pyramid level.
+@param polyN       size of the pixel neighborhood used to find polynomial expansion in each pixel; larger
+values mean that the image will be approximated with smoother surfaces, yielding more robust algorithm
+and more blurred motion field, typically polyN =5 or 7.
+@param polySigma   standard deviation of the Gaussian that is used to smooth derivatives used as a
+basis for the polynomial expansion; for polyN=5, you can set polySigma=1.1, for polyN=7, a good
+value would be polySigma=1.5.
+@param flags       operation flags that can be a combination of the following:
+- cv.OPTFLOW_USE_INITIAL_FLOW uses the input flow as an initial flow approximation.
+- cv.OPTFLOW_FARNEBACK_GAUSSIAN uses the Gaussian 𝚠𝚒𝚗𝚜𝚒𝚣𝚎×𝚠𝚒𝚗𝚜𝚒𝚣𝚎 filter instead of a box filter of
+the same size for optical flow estimation; usually, this option gives z more accurate flow than with
+a box filter, at the cost of lower speed; normally, winsize for a Gaussian window should be set to a
+larger value to achieve the same level of robustness.
+
+### Try it
+
+\htmlonly
+<iframe src="../../js_optical_flow_dense.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
diff --git a/doc/js_tutorials/js_video/js_meanshift/images/camshift_face.gif b/doc/js_tutorials/js_video/js_meanshift/images/camshift_face.gif
new file mode 100644 (file)
index 0000000..d46e1c7
Binary files /dev/null and b/doc/js_tutorials/js_video/js_meanshift/images/camshift_face.gif differ
diff --git a/doc/js_tutorials/js_video/js_meanshift/images/meanshift_basics.jpg b/doc/js_tutorials/js_video/js_meanshift/images/meanshift_basics.jpg
new file mode 100644 (file)
index 0000000..73e513f
Binary files /dev/null and b/doc/js_tutorials/js_video/js_meanshift/images/meanshift_basics.jpg differ
diff --git a/doc/js_tutorials/js_video/js_meanshift/images/meanshift_face.gif b/doc/js_tutorials/js_video/js_meanshift/images/meanshift_face.gif
new file mode 100644 (file)
index 0000000..6f97331
Binary files /dev/null and b/doc/js_tutorials/js_video/js_meanshift/images/meanshift_face.gif differ
diff --git a/doc/js_tutorials/js_video/js_meanshift/js_meanshift.markdown b/doc/js_tutorials/js_video/js_meanshift/js_meanshift.markdown
new file mode 100644 (file)
index 0000000..3c3526b
--- /dev/null
@@ -0,0 +1,98 @@
+Meanshift and Camshift {#tutorial_js_meanshift}
+======================
+
+Goal
+----
+
+-   We will learn about Meanshift and Camshift algorithms to find and track objects in videos.
+
+Meanshift
+---------
+
+The intuition behind the meanshift is simple. Consider you have a set of points. (It can be a pixel
+distribution like histogram backprojection). You are given a small window ( may be a circle) and you
+have to move that window to the area of maximum pixel density (or maximum number of points). It is
+illustrated in the simple image given below:
+
+![image](images/meanshift_basics.jpg)
+
+The initial window is shown in blue circle with the name "C1". Its original center is marked in blue
+rectangle, named "C1_o". But if you find the centroid of the points inside that window, you will
+get the point "C1_r" (marked in small blue circle) which is the real centroid of window. Surely
+they don't match. So move your window such that circle of the new window matches with previous
+centroid. Again find the new centroid. Most probably, it won't match. So move it again, and continue
+the iterations such that center of window and its centroid falls on the same location (or with a
+small desired error). So finally what you obtain is a window with maximum pixel distribution. It is
+marked with green circle, named "C2". As you can see in image, it has maximum number of points. The
+whole process is demonstrated on a static image below:
+
+![image](images/meanshift_face.gif)
+
+So we normally pass the histogram backprojected image and initial target location. When the object
+moves, obviously the movement is reflected in histogram backprojected image. As a result, meanshift
+algorithm moves our window to the new location with maximum density.
+
+### Meanshift in OpenCV.js
+
+To use meanshift in OpenCV.js, first we need to setup the target, find its histogram so that we can
+backproject the target on each frame for calculation of meanshift. We also need to provide initial
+location of window. For histogram, only Hue is considered here. Also, to avoid false values due to
+low light, low light values are discarded using **cv.inRange()** function.
+
+We use the function: **cv.meanShift (probImage, window, criteria)**
+@param probImage     Back projection of the object histogram. See cv.calcBackProject for details.
+@param window        Initial search window.
+@param criteria      Stop criteria for the iterative search algorithm.
+@return              number of iterations meanShift took to converge and the new location
+
+### Try it
+
+\htmlonly
+<iframe src="../../js_meanshift.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+Camshift
+--------
+
+Did you closely watch the last result? There is a problem. Our window always has the same size when
+the object is farther away and it is very close to camera. That is not good. We need to adapt the window
+size with size and rotation of the target. Once again, the solution came from "OpenCV Labs" and it
+is called CAMshift (Continuously Adaptive Meanshift) published by Gary Bradsky in his paper
+"Computer Vision Face Tracking for Use in a Perceptual User Interface" in 1988.
+
+It applies meanshift first. Once meanshift converges, it updates the size of the window as,
+\f$s = 2 \times \sqrt{\frac{M_{00}}{256}}\f$. It also calculates the orientation of best fitting ellipse
+to it. Again it applies the meanshift with new scaled search window and previous window location.
+The process is continued until required accuracy is met.
+
+![image](images/camshift_face.gif)
+
+### Camshift in OpenCV.js
+
+It is almost same as meanshift, but it returns a rotated rectangle (that is our result) and box
+parameters (used to be passed as search window in next iteration).
+
+We use the function: **cv.CamShift (probImage, window, criteria)**
+@param probImage     Back projection of the object histogram. See cv.calcBackProject for details.
+@param window        Initial search window.
+@param criteria      Stop criteria for the iterative search algorithm.
+@return              Rotated rectangle and the new search window
+
+### Try it
+
+\htmlonly
+<iframe src="../../js_camshift.html" width="100%"
+        onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
+</iframe>
+\endhtmlonly
+
+Additional Resources
+--------------------
+
+-#  French Wikipedia page on [Camshift](http://fr.wikipedia.org/wiki/Camshift). (The two animations
+    are taken from here)
+2.  Bradski, G.R., "Real time face and object tracking as a component of a perceptual user
+    interface," Applications of Computer Vision, 1998. WACV '98. Proceedings., Fourth IEEE Workshop
+    on , vol., no., pp.214,219, 19-21 Oct 1998
diff --git a/doc/js_tutorials/js_video/js_table_of_contents_video.markdown b/doc/js_tutorials/js_video/js_table_of_contents_video.markdown
new file mode 100644 (file)
index 0000000..193d53e
--- /dev/null
@@ -0,0 +1,17 @@
+Video Analysis {#tutorial_js_table_of_contents_video}
+==============
+
+-   @subpage tutorial_js_meanshift
+
+    Here, we will learn about tracking algorithms such as "Meanshift", and its upgraded version, "Camshift"
+    to find and track objects in videos.
+
+-   @subpage tutorial_js_lucas_kanade
+
+    Now let's discuss an important concept, "Optical Flow", which is related to videos and has many
+    applications.
+
+-   @subpage tutorial_js_bg_subtraction
+
+    In several applications, we need to extract foreground for further operations like object tracking.
+    Background Subtraction is a well-known method in those cases.
index f4bb251..62b649d 100644 (file)
   pages = {513--522},
   organization = {BMVA Press}
 }
+@ARTICLE{fitzgibbon1999,
+  abstract = {This work presents a new efficient method for fitting
+  ellipses to scattered data. Previous algorithms either
+  fitted general conics or were computationally expensive. By
+  minimizing the algebraic distance subject to the constraint
+  4ac-b<sup>2</sup>=1, the new method incorporates the
+  ellipticity constraint into the normalization factor. The
+  proposed method combines several advantages: It is
+  ellipse-specific, so that even bad data will always return
+  an ellipse. It can be solved naturally by a generalized
+  eigensystem. It is extremely robust, efficient, and easy to
+  implement},
+  author = {Fitzgibbon, Andrew and Pilu, Maurizio and Fisher, Robert B.},
+  doi= {10.1109/34.765658},
+  isbn= {0162-8828},
+  issn= {01628828},
+  journal = {IEEE Transactions on Pattern Analysis and Machine
+  Intelligence},
+  number = {5},
+  pages= {476--480},
+  pmid= {708},
+  title= {{Direct least square fitting of ellipses}},
+  volume = {21},
+  year= {1999}
+}
+@Article{taubin1991,
+  abstract = {The author addresses the problem of parametric
+  representation and estimation of complex planar curves in
+  2-D surfaces in 3-D, and nonplanar space curves in 3-D.
+  Curves and surfaces can be defined either parametrically or
+  implicitly, with the latter representation used here. A
+  planar curve is the set of zeros of a smooth function of
+  two variables <e1>x</e1>-<e1>y</e1>, a surface is the set
+  of zeros of a smooth function of three variables
+  <e1>x</e1>-<e1>y</e1>-<e1>z</e1>, and a space curve is the
+  intersection of two surfaces, which are the set of zeros of
+  two linearly independent smooth functions of three
+  variables <e1>x</e1>-<e1>y</e1>-<e1>z</e1> For example, the
+  surface of a complex object in 3-D can be represented as a
+  subset of a single implicit surface, with similar results
+  for planar and space curves. It is shown how this unified
+  representation can be used for object recognition, object
+  position estimation, and segmentation of objects into
+  meaningful subobjects, that is, the detection of `interest
+  regions' that are more complex than high curvature regions
+  and, hence, more useful as features for object
+  recognition},
+  author = {Taubin, Gabriel},
+  doi= {10.1109/34.103273},
+  isbn= {0162-8828},
+  issn= {01628828},
+  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
+  number = {11},
+  pages= {1115--1138},
+  title= {{Estimation of planar curves, surfaces, and nonplanar
+  space curves defined by implicit equations with
+  applications to edge and range image segmentation}},
+  volume = {13},
+  year= {1991}
+}
 @INPROCEEDINGS{G11,
   author = {Grundmann, Matthias and Kwatra, Vivek and Essa, Irfan},
   title = {Auto-directed video stabilization with robust l1 optimal camera paths},
index 1305d8a..f2b1aa0 100644 (file)
@@ -130,7 +130,7 @@ Or
 >>> b = img[:,:,0]
 @endcode
 Suppose, you want to make all the red pixels to zero, you need not split like this and put it equal
-to zero. You can simply use Numpy indexing, and that is more faster.
+to zero. You can simply use Numpy indexing, and that is faster.
 @code{.py}
 >>> img[:,:,2] = 0
 @endcode
index a047154..1c6ef13 100644 (file)
@@ -140,7 +140,7 @@ FLANN based Matcher
 
 FLANN stands for Fast Library for Approximate Nearest Neighbors. It contains a collection of
 algorithms optimized for fast nearest neighbor search in large datasets and for high dimensional
-features. It works more faster than BFMatcher for large datasets. We will see the second example
+features. It works faster than BFMatcher for large datasets. We will see the second example
 with FLANN based matcher.
 
 For FLANN based matcher, we need to pass two dictionaries which specifies the algorithm to be used,
index dab2add..7c9456f 100644 (file)
@@ -34,7 +34,7 @@ applications, rotation invariance is not required, so no need of finding this or
 speeds up the process. SURF provides such a functionality called Upright-SURF or U-SURF. It improves
 speed and is robust upto \f$\pm 15^{\circ}\f$. OpenCV supports both, depending upon the flag,
 **upright**. If it is 0, orientation is calculated. If it is 1, orientation is not calculated and it
-is more faster.
+is faster.
 
 ![image](images/surf_orientation.jpg)
 
@@ -130,7 +130,7 @@ False
 
 >>> plt.imshow(img2),plt.show()
 @endcode
-See the results below. All the orientations are shown in same direction. It is more faster than
+See the results below. All the orientations are shown in same direction. It is faster than
 previous. If you are working on cases where orientation is not a problem (like panorama stitching)
 etc, this is better.
 
index 1e2e8aa..94e249a 100644 (file)
@@ -101,7 +101,7 @@ Additional Resources
 --------------------
 
 -#  Canny edge detector at [Wikipedia](http://en.wikipedia.org/wiki/Canny_edge_detector)
--#  [Canny Edge Detection Tutorial](http://dasl.mem.drexel.edu/alumni/bGreen/www.pages.drexel.edu/_weg22/can_tut.html) by
+-#  [Canny Edge Detection Tutorial](http://dasl.unlv.edu/daslDrexel/alumni/bGreen/www.pages.drexel.edu/_weg22/can_tut.html) by
     Bill Green, 2002.
 
 Exercises
index 7d7eaac..27c0734 100644 (file)
@@ -99,7 +99,7 @@ as 0-0.99, 1-1.99, 2-2.99 etc. So final range would be 255-255.99. To represent
 np.histogram(). So for one-dimensional histograms, you can better try that. Don't forget to set
 minlength = 256 in np.bincount. For example, hist = np.bincount(img.ravel(),minlength=256)
 
-@note OpenCV function is more faster than (around 40X) than np.histogram(). So stick with OpenCV
+@note OpenCV function is faster than (around 40X) than np.histogram(). So stick with OpenCV
 function.
 
 Now we should plot histograms, but how?
index 73be69c..2441477 100644 (file)
@@ -4,6 +4,7 @@ OpenCV modules {#mainpage}
 - @ref intro
 - @ref tutorial_root
 - @ref tutorial_py_root
+@CMAKE_DOXYGEN_TUTORIAL_JS_ROOT@
 @CMAKE_DOXYGEN_TUTORIAL_CONTRIB_ROOT@
 - @ref faq
 - @ref citelist
index 753b368..e915d04 100644 (file)
@@ -1,16 +1,18 @@
 Smoothing Images {#tutorial_gausian_median_blur_bilateral_filter}
 ================
 
+@next_tutorial{tutorial_erosion_dilatation}
+
 Goal
 ----
 
 In this tutorial you will learn how to apply diverse linear filters to smooth images using OpenCV
 functions such as:
 
--   @ref cv::blur
--   @ref cv::GaussianBlur
--   @ref cv::medianBlur
--   @ref cv::bilateralFilter
+-   **blur()**
+-   **GaussianBlur()**
+-   **medianBlur()**
+-   **bilateralFilter()**
 
 Theory
 ------
@@ -92,38 +94,65 @@ Code
     -   Loads an image
     -   Applies 4 different kinds of filters (explained in Theory) and show the filtered images
         sequentially
+
+@add_toggle_cpp
+-   **Downloadable code**: Click
+    [here](https://raw.githubusercontent.com/opencv/opencv/master/samples/cpp/tutorial_code/ImgProc/Smoothing/Smoothing.cpp)
+
+-   **Code at glance:**
+    @include samples/cpp/tutorial_code/ImgProc/Smoothing/Smoothing.cpp
+@end_toggle
+
+@add_toggle_java
 -   **Downloadable code**: Click
-    [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/ImgProc/Smoothing.cpp)
+    [here](https://raw.githubusercontent.com/opencv/opencv/master/samples/java/tutorial_code/ImgProc/Smoothing/Smoothing.java)
+
+-   **Code at glance:**
+    @include samples/java/tutorial_code/ImgProc/Smoothing/Smoothing.java
+@end_toggle
+
+@add_toggle_python
+-   **Downloadable code**: Click
+    [here](https://raw.githubusercontent.com/opencv/opencv/master/samples/python/tutorial_code/imgProc/Smoothing/smoothing.py)
+
 -   **Code at glance:**
-    @include samples/cpp/tutorial_code/ImgProc/Smoothing.cpp
+    @include samples/python/tutorial_code/imgProc/Smoothing/smoothing.py
+@end_toggle
 
 Explanation
 -----------
 
--#  Let's check the OpenCV functions that involve only the smoothing procedure, since the rest is
-    already known by now.
--#  **Normalized Block Filter:**
+Let's check the OpenCV functions that involve only the smoothing procedure, since the rest is
+already known by now.
 
-    OpenCV offers the function @ref cv::blur to perform smoothing with this filter.
-    @snippet cpp/tutorial_code/ImgProc/Smoothing.cpp blur
+#### Normalized Block Filter:
 
+-   OpenCV offers the function **blur()** to perform smoothing with this filter.
     We specify 4 arguments (more details, check the Reference):
-
     -   *src*: Source image
     -   *dst*: Destination image
-    -   *Size( w,h )*: Defines the size of the kernel to be used ( of width *w* pixels and height
+    -   *Size( w, h )*: Defines the size of the kernel to be used ( of width *w* pixels and height
         *h* pixels)
     -   *Point(-1, -1)*: Indicates where the anchor point (the pixel evaluated) is located with
         respect to the neighborhood. If there is a negative value, then the center of the kernel is
         considered the anchor point.
 
--#  **Gaussian Filter:**
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgProc/Smoothing/Smoothing.cpp blur
+@end_toggle
 
-    It is performed by the function @ref cv::GaussianBlur :
-    @snippet cpp/tutorial_code/ImgProc/Smoothing.cpp gaussianblur
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgProc/Smoothing/Smoothing.java blur
+@end_toggle
 
-    Here we use 4 arguments (more details, check the OpenCV reference):
+@add_toggle_python
+@snippet samples/python/tutorial_code/imgProc/Smoothing/smoothing.py blur
+@end_toggle
+
+#### Gaussian Filter:
 
+-   It is performed by the function **GaussianBlur()** :
+    Here we use 4 arguments (more details, check the OpenCV reference):
     -   *src*: Source image
     -   *dst*: Destination image
     -   *Size(w, h)*: The size of the kernel to be used (the neighbors to be considered). \f$w\f$ and
@@ -134,35 +163,65 @@ Explanation
     -   \f$\sigma_{y}\f$: The standard deviation in y. Writing \f$0\f$ implies that \f$\sigma_{y}\f$ is
         calculated using kernel size.
 
--#  **Median Filter:**
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgProc/Smoothing/Smoothing.cpp gaussianblur
+@end_toggle
 
-    This filter is provided by the @ref cv::medianBlur function:
-    @snippet cpp/tutorial_code/ImgProc/Smoothing.cpp medianblur
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgProc/Smoothing/Smoothing.java gaussianblur
+@end_toggle
 
-    We use three arguments:
+@add_toggle_python
+@snippet samples/python/tutorial_code/imgProc/Smoothing/smoothing.py gaussianblur
+@end_toggle
+
+#### Median Filter:
 
+-   This filter is provided by the **medianBlur()** function:
+    We use three arguments:
     -   *src*: Source image
     -   *dst*: Destination image, must be the same type as *src*
     -   *i*: Size of the kernel (only one because we use a square window). Must be odd.
 
--#  **Bilateral Filter**
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgProc/Smoothing/Smoothing.cpp medianblur
+@end_toggle
 
-    Provided by OpenCV function @ref cv::bilateralFilter
-    @snippet cpp/tutorial_code/ImgProc/Smoothing.cpp bilateralfilter
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgProc/Smoothing/Smoothing.java medianblur
+@end_toggle
 
-    We use 5 arguments:
+@add_toggle_python
+@snippet samples/python/tutorial_code/imgProc/Smoothing/smoothing.py medianblur
+@end_toggle
+
+#### Bilateral Filter
 
+-   Provided by OpenCV function **bilateralFilter()**
+    We use 5 arguments:
     -   *src*: Source image
     -   *dst*: Destination image
     -   *d*: The diameter of each pixel neighborhood.
     -   \f$\sigma_{Color}\f$: Standard deviation in the color space.
     -   \f$\sigma_{Space}\f$: Standard deviation in the coordinate space (in pixel terms)
 
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgProc/Smoothing/Smoothing.cpp bilateralfilter
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgProc/Smoothing/Smoothing.java bilateralfilter
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/imgProc/Smoothing/smoothing.py bilateralfilter
+@end_toggle
+
 Results
 -------
 
--   The code opens an image (in this case *lena.jpg*) and display it under the effects of the 4
-    filters explained.
+-   The code opens an image (in this case [lena.jpg](https://raw.githubusercontent.com/opencv/opencv/master/samples/data/lena.jpg))
+    and display it under the effects of the 4 filters explained.
 -   Here is a snapshot of the image smoothed using *medianBlur*:
 
     ![](images/Smoothing_Tutorial_Result_Median_Filter.jpg)
index 085e7c2..c55f092 100644 (file)
@@ -1,22 +1,23 @@
 Hit-or-Miss {#tutorial_hitOrMiss}
 =================================
 
+@prev_tutorial{tutorial_opening_closing_hats}
+@next_tutorial{tutorial_morph_lines_detection}
+
 Goal
 ----
 
 In this tutorial you will learn how to find a given configuration or pattern in a binary image by using the Hit-or-Miss transform (also known as Hit-and-Miss transform).
 This transform is also the basis of more advanced morphological operations such as thinning or pruning.
 
-We will use the OpenCV function @ref cv::morphologyEx.
-
-
+We will use the OpenCV function **morphologyEx()** .
 
 Hit-or-Miss theory
 -------------------
 
 Morphological operators process images based on their shape. These operators apply one or more *structuring elements* to an input image to obtain the output image.
 The two basic morphological operations are the *erosion* and the *dilation*. The combination of these two operations generate advanced morphological transformations such as *opening*, *closing*, or *top-hat* transform.
-To know more about these and other basic morphological operations refer to previous tutorials @ref tutorial_erosion_dilatation "here" and @ref tutorial_opening_closing_hats "here".
+To know more about these and other basic morphological operations refer to previous tutorials (@ref tutorial_erosion_dilatation "Eroding and Dilating") and (@ref tutorial_opening_closing_hats "More Morphology Transformations").
 
 The Hit-or-Miss transformation is useful to find patterns in binary images. In particular, it finds those pixels whose neighbourhood matches the shape of a first structuring element \f$B_1\f$
 while not matching the shape of a second structuring element \f$B_2\f$ at the same time. Mathematically, the operation applied to an image \f$A\f$ can be expressed as follows:
@@ -43,11 +44,27 @@ You can see that the pattern is found in just one location within the image.
 Code
 ----
 
-The code corresponding to the previous example is shown below. You can also download it from
-[here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/ImgProc/HitMiss.cpp)
-@include samples/cpp/tutorial_code/ImgProc/HitMiss.cpp
+The code corresponding to the previous example is shown below.
+
+@add_toggle_cpp
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/cpp/tutorial_code/ImgProc/HitMiss/HitMiss.cpp)
+@include samples/cpp/tutorial_code/ImgProc/HitMiss/HitMiss.cpp
+@end_toggle
+
+@add_toggle_java
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/java/tutorial_code/ImgProc/HitMiss/HitMiss.java)
+@include samples/java/tutorial_code/ImgProc/HitMiss/HitMiss.java
+@end_toggle
+
+@add_toggle_python
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/python/tutorial_code/imgProc/HitMiss/hit_miss.py)
+@include samples/python/tutorial_code/imgProc/HitMiss/hit_miss.py
+@end_toggle
 
-As you can see, it is as simple as using the function @ref cv::morphologyEx with the operation type @ref cv::MORPH_HITMISS and the chosen kernel.
+As you can see, it is as simple as using the function **morphologyEx()** with the operation type **MORPH_HITMISS** and the chosen kernel.
 
 Other examples
 --------------
index 6b6efdf..8a4bbc0 100644 (file)
@@ -1,12 +1,15 @@
 Adding borders to your images {#tutorial_copyMakeBorder}
 =============================
 
+@prev_tutorial{tutorial_filter_2d}
+@next_tutorial{tutorial_sobel_derivatives}
+
 Goal
 ----
 
 In this tutorial you will learn how to:
 
--   Use the OpenCV function @ref cv::copyMakeBorder to set the borders (extra padding to your
+-   Use the OpenCV function **copyMakeBorder()** to set the borders (extra padding to your
     image).
 
 Theory
@@ -30,10 +33,7 @@ Theory
 
     This will be seen more clearly in the Code section.
 
-Code
-----
-
--#  **What does this program do?**
+-   **What does this program do?**
     -   Load an image
     -   Let the user choose what kind of padding use in the input image. There are two options:
 
@@ -45,38 +45,153 @@ Code
         The user chooses either option by pressing 'c' (constant) or 'r' (replicate)
     -   The program finishes when the user presses 'ESC'
 
--#  The tutorial code's is shown lines below. You can also download it from
-    [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp)
-    @include samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp
+Code
+----
+
+The tutorial code's is shown lines below.
+
+@add_toggle_cpp
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp)
+@include samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp
+@end_toggle
+
+@add_toggle_java
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/java/tutorial_code/ImgTrans/MakeBorder/CopyMakeBorder.java)
+@include samples/java/tutorial_code/ImgTrans/MakeBorder/CopyMakeBorder.java
+@end_toggle
+
+@add_toggle_python
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/python/tutorial_code/ImgTrans/MakeBorder/copy_make_border.py)
+@include samples/python/tutorial_code/ImgTrans/MakeBorder/copy_make_border.py
+@end_toggle
 
 Explanation
 -----------
 
--#  First we declare the variables we are going to use:
-    @snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp variables
+#### Declare the variables
+
+First we declare the variables we are going to use:
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp variables
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgTrans/MakeBorder/CopyMakeBorder.java variables
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/ImgTrans/MakeBorder/copy_make_border.py variables
+@end_toggle
+
+Especial attention deserves the variable *rng* which is a random number generator. We use it to
+generate the random border color, as we will see soon.
+
+#### Load an image
+
+As usual we load our source image *src*:
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp load
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgTrans/MakeBorder/CopyMakeBorder.java load
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/ImgTrans/MakeBorder/copy_make_border.py load
+@end_toggle
+
+#### Create a window
+
+After giving a short intro of how to use the program, we create a window:
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp create_window
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgTrans/MakeBorder/CopyMakeBorder.java create_window
+@end_toggle
 
-    Especial attention deserves the variable *rng* which is a random number generator. We use it to
-    generate the random border color, as we will see soon.
+@add_toggle_python
+@snippet python/tutorial_code/ImgTrans/MakeBorder/copy_make_border.py create_window
+@end_toggle
 
--#  As usual we load our source image *src*:
-    @snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp load
+#### Initialize arguments
 
--#  After giving a short intro of how to use the program, we create a window:
-    @snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp create_window
--#  Now we initialize the argument that defines the size of the borders (*top*, *bottom*, *left* and
-    *right*). We give them a value of 5% the size of *src*.
-    @snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp init_arguments
--#  The program runs in a **for** loop. If the user presses 'c' or 'r', the *borderType* variable
-    takes the value of *BORDER_CONSTANT* or *BORDER_REPLICATE* respectively:
-    @snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp check_keypress
--#  In each iteration (after 0.5 seconds), the variable *value* is updated...
-    @snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp update_value
-    with a random value generated by the **RNG** variable *rng*. This value is a number picked
-    randomly in the range \f$[0,255]\f$
+Now we initialize the argument that defines the size of the borders (*top*, *bottom*, *left* and
+*right*). We give them a value of 5% the size of *src*.
 
--#  Finally, we call the function @ref cv::copyMakeBorder to apply the respective padding:
-    @snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp copymakeborder
-    The arguments are:
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp init_arguments
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgTrans/MakeBorder/CopyMakeBorder.java init_arguments
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/ImgTrans/MakeBorder/copy_make_border.py init_arguments
+@end_toggle
+
+#### Loop
+
+The program runs in an infinite loop while the key **ESC** isn't pressed.
+If the user presses '**c**' or '**r**', the *borderType* variable
+takes the value of *BORDER_CONSTANT* or *BORDER_REPLICATE* respectively:
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp check_keypress
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgTrans/MakeBorder/CopyMakeBorder.java check_keypress
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/ImgTrans/MakeBorder/copy_make_border.py check_keypress
+@end_toggle
+
+#### Random color
+
+In each iteration (after 0.5 seconds), the random border color (*value*) is updated...
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp update_value
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgTrans/MakeBorder/CopyMakeBorder.java update_value
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/ImgTrans/MakeBorder/copy_make_border.py update_value
+@end_toggle
+
+This value is a set of three numbers picked randomly in the range \f$[0,255]\f$.
+
+#### Form a border around the image
+
+Finally, we call the function **copyMakeBorder()** to apply the respective padding:
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp copymakeborder
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgTrans/MakeBorder/CopyMakeBorder.java copymakeborder
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/ImgTrans/MakeBorder/copy_make_border.py copymakeborder
+@end_toggle
+
+-   The arguments are:
 
     -#  *src*: Source image
     -#  *dst*: Destination image
@@ -87,8 +202,21 @@ Explanation
     -#  *value*: If *borderType* is *BORDER_CONSTANT*, this is the value used to fill the border
         pixels.
 
--#  We display our output image in the image created previously
-    @snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp display
+#### Display the results
+
+We display our output image in the image created previously
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp display
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgTrans/MakeBorder/CopyMakeBorder.java display
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/ImgTrans/MakeBorder/copy_make_border.py display
+@end_toggle
 
 Results
 -------
index 5716410..454f745 100644 (file)
@@ -1,12 +1,15 @@
 Making your own linear filters! {#tutorial_filter_2d}
 ===============================
 
+@prev_tutorial{tutorial_threshold_inRange}
+@next_tutorial{tutorial_copyMakeBorder}
+
 Goal
 ----
 
 In this tutorial you will learn how to:
 
--   Use the OpenCV function @ref cv::filter2D to create your own linear filters.
+-   Use the OpenCV function **filter2D()** to create your own linear filters.
 
 Theory
 ------
@@ -40,61 +43,127 @@ Expressing the procedure above in the form of an equation we would have:
 
 \f[H(x,y) = \sum_{i=0}^{M_{i} - 1} \sum_{j=0}^{M_{j}-1} I(x+i - a_{i}, y + j - a_{j})K(i,j)\f]
 
-Fortunately, OpenCV provides you with the function @ref cv::filter2D so you do not have to code all
+Fortunately, OpenCV provides you with the function **filter2D()** so you do not have to code all
 these operations.
 
-Code
-----
-
--#  **What does this program do?**
-    -   Loads an image
-    -   Performs a *normalized box filter*. For instance, for a kernel of size \f$size = 3\f$, the
-        kernel would be:
+###  What does this program do?
+-   Loads an image
+-   Performs a *normalized box filter*. For instance, for a kernel of size \f$size = 3\f$, the
+    kernel would be:
 
-        \f[K = \dfrac{1}{3 \cdot 3} \begin{bmatrix}
-        1 & 1 & 1  \\
+\f[K = \dfrac{1}{3 \cdot 3} \begin{bmatrix}
+1 & 1 & 1  \\
         1 & 1 & 1  \\
         1 & 1 & 1
-        \end{bmatrix}\f]
+\end{bmatrix}\f]
+
+The program will perform the filter operation with kernels of sizes 3, 5, 7, 9 and 11.
 
-        The program will perform the filter operation with kernels of sizes 3, 5, 7, 9 and 11.
+-   The filter output (with each kernel) will be shown during 500 milliseconds
 
-    -   The filter output (with each kernel) will be shown during 500 milliseconds
+Code
+----
 
--#  The tutorial code's is shown lines below. You can also download it from
-    [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/ImgTrans/filter2D_demo.cpp)
-    @include cpp/tutorial_code/ImgTrans/filter2D_demo.cpp
+The tutorial code's is shown in the lines below.
+
+@add_toggle_cpp
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/cpp/tutorial_code/ImgTrans/filter2D_demo.cpp)
+@include cpp/tutorial_code/ImgTrans/filter2D_demo.cpp
+@end_toggle
+
+@add_toggle_java
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/java/tutorial_code/ImgTrans/Filter2D/Filter2D_Demo.java)
+@include java/tutorial_code/ImgTrans/Filter2D/Filter2D_Demo.java
+@end_toggle
+
+@add_toggle_python
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/python/tutorial_code/ImgTrans/Filter2D/filter2D.py)
+@include python/tutorial_code/ImgTrans/Filter2D/filter2D.py
+@end_toggle
 
 Explanation
 -----------
 
--#  Load an image
-    @snippet cpp/tutorial_code/ImgTrans/filter2D_demo.cpp load
--#  Initialize the arguments for the linear filter
-    @snippet cpp/tutorial_code/ImgTrans/filter2D_demo.cpp init_arguments
--#  Perform an infinite loop updating the kernel size and applying our linear filter to the input
-    image. Let's analyze that more in detail:
--#  First we define the kernel our filter is going to use. Here it is:
-    @snippet cpp/tutorial_code/ImgTrans/filter2D_demo.cpp update_kernel
-    The first line is to update the *kernel_size* to odd values in the range: \f$[3,11]\f$. The second
-    line actually builds the kernel by setting its value to a matrix filled with \f$1's\f$ and
-    normalizing it by dividing it between the number of elements.
-
--#  After setting the kernel, we can generate the filter by using the function @ref cv::filter2D :
-    @snippet cpp/tutorial_code/ImgTrans/filter2D_demo.cpp apply_filter
-    The arguments denote:
-
-    -#  *src*: Source image
-    -#  *dst*: Destination image
-    -#  *ddepth*: The depth of *dst*. A negative value (such as \f$-1\f$) indicates that the depth is
+####  Load an image
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/filter2D_demo.cpp load
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgTrans/Filter2D/Filter2D_Demo.java load
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/ImgTrans/Filter2D/filter2D.py load
+@end_toggle
+
+####  Initialize the arguments
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/filter2D_demo.cpp init_arguments
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgTrans/Filter2D/Filter2D_Demo.java init_arguments
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/ImgTrans/Filter2D/filter2D.py init_arguments
+@end_toggle
+
+##### Loop
+
+Perform an infinite loop updating the kernel size and applying our linear filter to the input
+image. Let's analyze that more in detail:
+
+-  First we define the kernel our filter is going to use. Here it is:
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/filter2D_demo.cpp update_kernel
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgTrans/Filter2D/Filter2D_Demo.java update_kernel
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/ImgTrans/Filter2D/filter2D.py update_kernel
+@end_toggle
+
+The first line is to update the *kernel_size* to odd values in the range: \f$[3,11]\f$.
+The second line actually builds the kernel by setting its value to a matrix filled with
+\f$1's\f$ and normalizing it by dividing it between the number of elements.
+
+-  After setting the kernel, we can generate the filter by using the function **filter2D()** :
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/filter2D_demo.cpp apply_filter
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgTrans/Filter2D/Filter2D_Demo.java apply_filter
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/ImgTrans/Filter2D/filter2D.py apply_filter
+@end_toggle
+
+-  The arguments denote:
+       -  *src*: Source image
+       -  *dst*: Destination image
+       -  *ddepth*: The depth of *dst*. A negative value (such as \f$-1\f$) indicates that the depth is
         the same as the source.
-    -#  *kernel*: The kernel to be scanned through the image
-    -#  *anchor*: The position of the anchor relative to its kernel. The location *Point(-1, -1)*
-        indicates the center by default.
-    -#  *delta*: A value to be added to each pixel during the correlation. By default it is \f$0\f$
-    -#  *BORDER_DEFAULT*: We let this value by default (more details in the following tutorial)
+       -  *kernel*: The kernel to be scanned through the image
+       -  *anchor*: The position of the anchor relative to its kernel. The location *Point(-1, -1)*
+       indicates the center by default.
+       -  *delta*: A value to be added to each pixel during the correlation. By default it is \f$0\f$
+       -  *BORDER_DEFAULT*: We let this value by default (more details in the following tutorial)
 
--#  Our program will effectuate a *while* loop, each 500 ms the kernel size of our filter will be
+-  Our program will effectuate a *while* loop, each 500 ms the kernel size of our filter will be
     updated in the range indicated.
 
 Results
@@ -104,4 +173,4 @@ Results
     result should be a window that shows an image blurred by a normalized filter. Each 0.5 seconds
     the kernel size should change, as can be seen in the series of snapshots below:
 
-    ![](images/filter_2d_tutorial_result.jpg)
+![](images/filter_2d_tutorial_result.jpg)
index 36c03cd..fe2f88b 100644 (file)
@@ -1,12 +1,15 @@
 Hough Circle Transform {#tutorial_hough_circle}
 ======================
 
+@prev_tutorial{tutorial_hough_lines}
+@next_tutorial{tutorial_remap}
+
 Goal
 ----
 
 In this tutorial you will learn how to:
 
--   Use the OpenCV function @ref cv::HoughCircles to detect circles in an image.
+-   Use the OpenCV function **HoughCircles()** to detect circles in an image.
 
 Theory
 ------
@@ -31,31 +34,96 @@ Theory
     the best radius for each candidate center. For more details, please check the book *Learning
     OpenCV* or your favorite Computer Vision bibliography
 
+####  What does this program do?
+-   Loads an image and blur it to reduce the noise
+-   Applies the *Hough Circle Transform* to the blurred image .
+-   Display the detected circle in a window.
+
 Code
 ----
 
--#  **What does this program do?**
-    -   Loads an image and blur it to reduce the noise
-    -   Applies the *Hough Circle Transform* to the blurred image .
-    -   Display the detected circle in a window.
-
--#  The sample code that we will explain can be downloaded from [here](https://github.com/opencv/opencv/tree/master/samples/cpp/houghcircles.cpp).
-    A slightly fancier version (which shows trackbars for
-    changing the threshold values) can be found [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/ImgTrans/HoughCircle_Demo.cpp).
-    @include samples/cpp/houghcircles.cpp
+@add_toggle_cpp
+The sample code that we will explain can be downloaded from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp).
+A slightly fancier version (which shows trackbars for changing the threshold values) can be found
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/cpp/tutorial_code/ImgTrans/HoughCircle_Demo.cpp).
+@include samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp
+@end_toggle
+
+@add_toggle_java
+The sample code that we will explain can be downloaded from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/java/tutorial_code/ImgTrans/HoughCircle/HoughCircles.java).
+@include samples/java/tutorial_code/ImgTrans/HoughCircle/HoughCircles.java
+@end_toggle
+
+@add_toggle_python
+The sample code that we will explain can be downloaded from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/python/tutorial_code/ImgTrans/HoughCircle/hough_circle.py).
+@include samples/python/tutorial_code/ImgTrans/HoughCircle/hough_circle.py
+@end_toggle
 
 Explanation
 -----------
 
--#  Load an image
-    @snippet samples/cpp/houghcircles.cpp load
--#  Convert it to grayscale:
-    @snippet samples/cpp/houghcircles.cpp convert_to_gray
--#  Apply a Median blur to reduce noise and avoid false circle detection:
-    @snippet samples/cpp/houghcircles.cpp reduce_noise
--#  Proceed to apply Hough Circle Transform:
-    @snippet samples/cpp/houghcircles.cpp houghcircles
-    with the arguments:
+The image we used can be found [here](https://raw.githubusercontent.com/opencv/opencv/master/samples/data/smarties.png)
+
+####  Load an image:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp load
+@end_toggle
+
+@add_toggle_java
+@snippet samples/python/tutorial_code/ImgTrans/HoughCircle/hough_circle.py load
+@end_toggle
+
+@add_toggle_python
+@snippet samples/java/tutorial_code/ImgTrans/HoughCircle/HoughCircles.java load
+@end_toggle
+
+####  Convert it to grayscale:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp convert_to_gray
+@end_toggle
+
+@add_toggle_java
+@snippet samples/python/tutorial_code/ImgTrans/HoughCircle/hough_circle.py convert_to_gray
+@end_toggle
+
+@add_toggle_python
+@snippet samples/java/tutorial_code/ImgTrans/HoughCircle/HoughCircles.java convert_to_gray
+@end_toggle
+
+#### Apply a Median blur to reduce noise and avoid false circle detection:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp reduce_noise
+@end_toggle
+
+@add_toggle_java
+@snippet samples/python/tutorial_code/ImgTrans/HoughCircle/hough_circle.py reduce_noise
+@end_toggle
+
+@add_toggle_python
+@snippet samples/java/tutorial_code/ImgTrans/HoughCircle/HoughCircles.java reduce_noise
+@end_toggle
+
+#### Proceed to apply Hough Circle Transform:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp houghcircles
+@end_toggle
+
+@add_toggle_java
+@snippet samples/python/tutorial_code/ImgTrans/HoughCircle/hough_circle.py houghcircles
+@end_toggle
+
+@add_toggle_python
+@snippet samples/java/tutorial_code/ImgTrans/HoughCircle/HoughCircles.java houghcircles
+@end_toggle
+
+-   with the arguments:
 
     -   *gray*: Input image (grayscale).
     -   *circles*: A vector that stores sets of 3 values: \f$x_{c}, y_{c}, r\f$ for each detected
@@ -69,16 +137,39 @@ Explanation
     -   *min_radius = 0*: Minimum radius to be detected. If unknown, put zero as default.
     -   *max_radius = 0*: Maximum radius to be detected. If unknown, put zero as default.
 
--#  Draw the detected circles:
-    @snippet samples/cpp/houghcircles.cpp draw
-    You can see that we will draw the circle(s) on red and the center(s) with a small green dot
+####  Draw the detected circles:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp draw
+@end_toggle
+
+@add_toggle_java
+@snippet samples/python/tutorial_code/ImgTrans/HoughCircle/hough_circle.py draw
+@end_toggle
+
+@add_toggle_python
+@snippet samples/java/tutorial_code/ImgTrans/HoughCircle/HoughCircles.java draw
+@end_toggle
+
+You can see that we will draw the circle(s) on red and the center(s) with a small green dot
+
+####  Display the detected circle(s) and wait for the user to exit the program:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp display
+@end_toggle
+
+@add_toggle_java
+@snippet samples/python/tutorial_code/ImgTrans/HoughCircle/hough_circle.py display
+@end_toggle
 
--#  Display the detected circle(s) and wait for the user to exit the program:
-    @snippet samples/cpp/houghcircles.cpp display
+@add_toggle_python
+@snippet samples/java/tutorial_code/ImgTrans/HoughCircle/HoughCircles.java display
+@end_toggle
 
 Result
 ------
 
 The result of running the code above with a test image is shown below:
 
-![](images/Hough_Circle_Tutorial_Result.jpg)
+![](images/Hough_Circle_Tutorial_Result.png)
diff --git a/doc/tutorials/imgproc/imgtrans/hough_circle/images/Hough_Circle_Tutorial_Result.png b/doc/tutorials/imgproc/imgtrans/hough_circle/images/Hough_Circle_Tutorial_Result.png
new file mode 100644 (file)
index 0000000..b19ee00
Binary files /dev/null and b/doc/tutorials/imgproc/imgtrans/hough_circle/images/Hough_Circle_Tutorial_Result.png differ
index 584c3f8..8b24d87 100644 (file)
@@ -1,12 +1,15 @@
 Hough Line Transform {#tutorial_hough_lines}
 ====================
 
+@prev_tutorial{tutorial_canny_detector}
+@next_tutorial{tutorial_hough_circle}
+
 Goal
 ----
 
 In this tutorial you will learn how to:
 
--   Use the OpenCV functions @ref cv::HoughLines and @ref cv::HoughLinesP to detect lines in an
+-   Use the OpenCV functions **HoughLines()** and **HoughLinesP()** to detect lines in an
     image.
 
 Theory
@@ -79,54 +82,93 @@ a.  **The Standard Hough Transform**
 
 -   It consists in pretty much what we just explained in the previous section. It gives you as
     result a vector of couples \f$(\theta, r_{\theta})\f$
--   In OpenCV it is implemented with the function @ref cv::HoughLines
+-   In OpenCV it is implemented with the function **HoughLines()**
 
 b.  **The Probabilistic Hough Line Transform**
 
 -   A more efficient implementation of the Hough Line Transform. It gives as output the extremes
     of the detected lines \f$(x_{0}, y_{0}, x_{1}, y_{1})\f$
--   In OpenCV it is implemented with the function @ref cv::HoughLinesP
+-   In OpenCV it is implemented with the function **HoughLinesP()**
+
+###  What does this program do?
+    -   Loads an image
+    -   Applies a *Standard Hough Line Transform* and a *Probabilistic Line Transform*.
+    -   Display the original image and the detected line in three windows.
 
 Code
 ----
 
--#  **What does this program do?**
-    -   Loads an image
-    -   Applies either a *Standard Hough Line Transform* or a *Probabilistic Line Transform*.
-    -   Display the original image and the detected line in two windows.
-
--#  The sample code that we will explain can be downloaded from [here](https://github.com/opencv/opencv/tree/master/samples/cpp/houghlines.cpp). A slightly fancier version
-    (which shows both Hough standard and probabilistic with trackbars for changing the threshold
-    values) can be found [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/ImgTrans/HoughLines_Demo.cpp).
-    @include samples/cpp/houghlines.cpp
+@add_toggle_cpp
+The sample code that we will explain can be downloaded from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/cpp/tutorial_code/ImgTrans/houghlines.cpp).
+A slightly fancier version (which shows both Hough standard and probabilistic
+with trackbars for changing the threshold values) can be found
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/cpp/tutorial_code/ImgTrans/HoughLines_Demo.cpp).
+@include samples/cpp/tutorial_code/ImgTrans/houghlines.cpp
+@end_toggle
+
+@add_toggle_java
+The sample code that we will explain can be downloaded from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/java/tutorial_code/ImgTrans/HoughLine/HoughLines.java).
+@include samples/java/tutorial_code/ImgTrans/HoughLine/HoughLines.java
+@end_toggle
+
+@add_toggle_python
+The sample code that we will explain can be downloaded from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/python/tutorial_code/ImgTrans/HoughLine/hough_lines.py).
+@include samples/python/tutorial_code/ImgTrans/HoughLine/hough_lines.py
+@end_toggle
 
 Explanation
 -----------
 
--#  Load an image
-    @code{.cpp}
-    Mat src = imread(filename, 0);
-    if(src.empty())
-    {
-      help();
-      cout << "can not open " << filename << endl;
-      return -1;
-    }
-    @endcode
--#  Detect the edges of the image by using a Canny detector
-    @code{.cpp}
-    Canny(src, dst, 50, 200, 3);
-    @endcode
-    Now we will apply the Hough Line Transform. We will explain how to use both OpenCV functions
-    available for this purpose:
-
--#  **Standard Hough Line Transform**
-    -#  First, you apply the Transform:
-        @code{.cpp}
-        vector<Vec2f> lines;
-        HoughLines(dst, lines, 1, CV_PI/180, 100, 0, 0 );
-        @endcode
-        with the following arguments:
+#### Load an image:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgTrans/houghlines.cpp load
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/HoughLine/HoughLines.java load
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/HoughLine/hough_lines.py load
+@end_toggle
+
+#### Detect the edges of the image by using a Canny detector:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgTrans/houghlines.cpp edge_detection
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/HoughLine/HoughLines.java edge_detection
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/HoughLine/hough_lines.py edge_detection
+@end_toggle
+
+Now we will apply the Hough Line Transform. We will explain how to use both OpenCV functions
+available for this purpose.
+
+#### Standard Hough Line Transform:
+First, you apply the Transform:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgTrans/houghlines.cpp hough_lines
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/HoughLine/HoughLines.java hough_lines
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/HoughLine/hough_lines.py hough_lines
+@end_toggle
+
+-       with the following arguments:
 
         -   *dst*: Output of the edge detector. It should be a grayscale image (although in fact it
             is a binary one)
@@ -137,28 +179,35 @@ Explanation
         -   *threshold*: The minimum number of intersections to "*detect*" a line
         -   *srn* and *stn*: Default parameters to zero. Check OpenCV reference for more info.
 
-    -#  And then you display the result by drawing the lines.
-        @code{.cpp}
-        for( size_t i = 0; i < lines.size(); i++ )
-        {
-          float rho = lines[i][0], theta = lines[i][1];
-          Point pt1, pt2;
-          double a = cos(theta), b = sin(theta);
-          double x0 = a*rho, y0 = b*rho;
-          pt1.x = cvRound(x0 + 1000*(-b));
-          pt1.y = cvRound(y0 + 1000*(a));
-          pt2.x = cvRound(x0 - 1000*(-b));
-          pt2.y = cvRound(y0 - 1000*(a));
-          line( cdst, pt1, pt2, Scalar(0,0,255), 3, LINE_AA);
-        }
-        @endcode
--#  **Probabilistic Hough Line Transform**
-    -#  First you apply the transform:
-        @code{.cpp}
-        vector<Vec4i> lines;
-        HoughLinesP(dst, lines, 1, CV_PI/180, 50, 50, 10 );
-        @endcode
-        with the arguments:
+And then you display the result by drawing the lines.
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgTrans/houghlines.cpp draw_lines
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/HoughLine/HoughLines.java draw_lines
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/HoughLine/hough_lines.py draw_lines
+@end_toggle
+
+#### Probabilistic Hough Line Transform
+First you apply the transform:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgTrans/houghlines.cpp hough_lines_p
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/HoughLine/HoughLines.java hough_lines_p
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/HoughLine/hough_lines.py hough_lines_p
+@end_toggle
+
+-       with the arguments:
 
         -   *dst*: Output of the edge detector. It should be a grayscale image (although in fact it
             is a binary one)
@@ -172,23 +221,47 @@ Explanation
             this number of points are disregarded.
         -   *maxLineGap*: The maximum gap between two points to be considered in the same line.
 
-    -#  And then you display the result by drawing the lines.
-        @code{.cpp}
-        for( size_t i = 0; i < lines.size(); i++ )
-        {
-          Vec4i l = lines[i];
-          line( cdst, Point(l[0], l[1]), Point(l[2], l[3]), Scalar(0,0,255), 3, LINE_AA);
-        }
-        @endcode
--#  Display the original image and the detected lines:
-    @code{.cpp}
-    imshow("source", src);
-    imshow("detected lines", cdst);
-    @endcode
--#  Wait until the user exits the program
-    @code{.cpp}
-    waitKey();
-    @endcode
+And then you display the result by drawing the lines.
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgTrans/houghlines.cpp draw_lines_p
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/HoughLine/HoughLines.java draw_lines_p
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/HoughLine/hough_lines.py draw_lines_p
+@end_toggle
+
+#### Display the original image and the detected lines:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgTrans/houghlines.cpp imshow
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/HoughLine/HoughLines.java imshow
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/HoughLine/hough_lines.py imshow
+@end_toggle
+
+#### Wait until the user exits the program
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgTrans/houghlines.cpp exit
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/HoughLine/HoughLines.java exit
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/HoughLine/hough_lines.py exit
+@end_toggle
 
 Result
 ------
@@ -198,13 +271,11 @@ Result
     section. It still implements the same stuff as above, only adding the Trackbar for the
     Threshold.
 
-Using an input image such as:
-
-![](images/Hough_Lines_Tutorial_Original_Image.jpg)
-
-We get the following result by using the Probabilistic Hough Line Transform:
-
-![](images/Hough_Lines_Tutorial_Result.jpg)
+Using an input image such as a [sudoku image](https://raw.githubusercontent.com/opencv/opencv/master/samples/data/sudoku.png).
+We get the following result by using the Standard Hough Line Transform:
+![](images/hough_lines_result1.png)
+And by using the Probabilistic Hough Line Transform:
+![](images/hough_lines_result2.png)
 
 You may observe that the number of lines detected vary while you change the *threshold*. The
 explanation is sort of evident: If you establish a higher threshold, fewer lines will be detected
diff --git a/doc/tutorials/imgproc/imgtrans/hough_lines/images/hough_lines_result1.png b/doc/tutorials/imgproc/imgtrans/hough_lines/images/hough_lines_result1.png
new file mode 100644 (file)
index 0000000..012cee5
Binary files /dev/null and b/doc/tutorials/imgproc/imgtrans/hough_lines/images/hough_lines_result1.png differ
diff --git a/doc/tutorials/imgproc/imgtrans/hough_lines/images/hough_lines_result2.png b/doc/tutorials/imgproc/imgtrans/hough_lines/images/hough_lines_result2.png
new file mode 100644 (file)
index 0000000..a4dda92
Binary files /dev/null and b/doc/tutorials/imgproc/imgtrans/hough_lines/images/hough_lines_result2.png differ
index f178170..63aed35 100644 (file)
@@ -1,12 +1,15 @@
 Laplace Operator {#tutorial_laplace_operator}
 ================
 
+@prev_tutorial{tutorial_sobel_derivatives}
+@next_tutorial{tutorial_canny_detector}
+
 Goal
 ----
 
 In this tutorial you will learn how to:
 
--   Use the OpenCV function @ref cv::Laplacian to implement a discrete analog of the *Laplacian
+-   Use the OpenCV function **Laplacian()** to implement a discrete analog of the *Laplacian
     operator*.
 
 Theory
@@ -37,7 +40,7 @@ Theory
 
 \f[Laplace(f) = \dfrac{\partial^{2} f}{\partial x^{2}} + \dfrac{\partial^{2} f}{\partial y^{2}}\f]
 
--#  The Laplacian operator is implemented in OpenCV by the function @ref cv::Laplacian . In fact,
+-#  The Laplacian operator is implemented in OpenCV by the function **Laplacian()** . In fact,
     since the Laplacian uses the gradient of images, it calls internally the *Sobel* operator to
     perform its computation.
 
@@ -50,25 +53,98 @@ Code
     -   Applies a Laplacian operator to the grayscale image and stores the output image
     -   Display the result in a window
 
+@add_toggle_cpp
 -#  The tutorial code's is shown lines below. You can also download it from
-    [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp)
+    [here](https://raw.githubusercontent.com/opencv/opencv/master/samples/cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp)
     @include samples/cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp
+@end_toggle
+
+@add_toggle_java
+-#  The tutorial code's is shown lines below. You can also download it from
+    [here](https://raw.githubusercontent.com/opencv/opencv/master/samples/java/tutorial_code/ImgTrans/LaPlace/LaplaceDemo.java)
+    @include samples/java/tutorial_code/ImgTrans/LaPlace/LaplaceDemo.java
+@end_toggle
+
+@add_toggle_python
+-#  The tutorial code's is shown lines below. You can also download it from
+    [here](https://raw.githubusercontent.com/opencv/opencv/master/samples/python/tutorial_code/ImgTrans/LaPlace/laplace_demo.py)
+    @include samples/python/tutorial_code/ImgTrans/LaPlace/laplace_demo.py
+@end_toggle
 
 Explanation
 -----------
 
--#  Create some needed variables:
-    @snippet cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp variables
--#  Loads the source image:
-    @snippet cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp load
--#  Apply a Gaussian blur to reduce noise:
-    @snippet cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp reduce_noise
--#  Convert the image to grayscale using @ref cv::cvtColor
-    @snippet cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp convert_to_gray
--#  Apply the Laplacian operator to the grayscale image:
-    @snippet cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp laplacian
-    where the arguments are:
+#### Declare variables
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp variables
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/LaPlace/LaplaceDemo.java variables
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/LaPlace/laplace_demo.py variables
+@end_toggle
+
+#### Load source image
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp load
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/LaPlace/LaplaceDemo.java load
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/LaPlace/laplace_demo.py load
+@end_toggle
+
+#### Reduce noise
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp reduce_noise
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/LaPlace/LaplaceDemo.java reduce_noise
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/LaPlace/laplace_demo.py reduce_noise
+@end_toggle
 
+#### Grayscale
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp convert_to_gray
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/LaPlace/LaplaceDemo.java convert_to_gray
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/LaPlace/laplace_demo.py convert_to_gray
+@end_toggle
+
+#### Laplacian operator
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp laplacian
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/LaPlace/LaplaceDemo.java laplacian
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/LaPlace/laplace_demo.py laplacian
+@end_toggle
+
+-   The arguments are:
     -   *src_gray*: The input image.
     -   *dst*: Destination (output) image
     -   *ddepth*: Depth of the destination image. Since our input is *CV_8U* we define *ddepth* =
@@ -77,10 +153,33 @@ Explanation
         this example.
     -   *scale*, *delta* and *BORDER_DEFAULT*: We leave them as default values.
 
--#  Convert the output from the Laplacian operator to a *CV_8U* image:
-    @snippet cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp convert
--#  Display the result in a window:
-    @snippet cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp display
+#### Convert output to a *CV_8U* image
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp convert
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/LaPlace/LaplaceDemo.java convert
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/LaPlace/laplace_demo.py convert
+@end_toggle
+
+#### Display the result
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp display
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgTrans/LaPlace/LaplaceDemo.java display
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ImgTrans/LaPlace/laplace_demo.py display
+@end_toggle
 
 Results
 -------
index 3112b08..f6010fc 100644 (file)
@@ -1,13 +1,16 @@
 Sobel Derivatives {#tutorial_sobel_derivatives}
 =================
 
+@prev_tutorial{tutorial_copyMakeBorder}
+@next_tutorial{tutorial_laplace_operator}
+
 Goal
 ----
 
 In this tutorial you will learn how to:
 
--   Use the OpenCV function @ref cv::Sobel to calculate the derivatives from an image.
--   Use the OpenCV function @ref cv::Scharr to calculate a more accurate derivative for a kernel of
+-   Use the OpenCV function **Sobel()** to calculate the derivatives from an image.
+-   Use the OpenCV function **Scharr()** to calculate a more accurate derivative for a kernel of
     size \f$3 \cdot 3\f$
 
 Theory
@@ -83,7 +86,7 @@ Assuming that the image to be operated is \f$I\f$:
 @note
     When the size of the kernel is `3`, the Sobel kernel shown above may produce noticeable
     inaccuracies (after all, Sobel is only an approximation of the derivative). OpenCV addresses
-    this inaccuracy for kernels of size 3 by using the @ref cv::Scharr function. This is as fast
+    this inaccuracy for kernels of size 3 by using the **Scharr()** function. This is as fast
     but more accurate than the standar Sobel function. It implements the following kernels:
     \f[G_{x} = \begin{bmatrix}
     -3 & 0 & +3  \\
@@ -95,9 +98,9 @@ Assuming that the image to be operated is \f$I\f$:
     +3 & +10 & +3
     \end{bmatrix}\f]
 @note
-    You can check out more information of this function in the OpenCV reference (@ref cv::Scharr ).
-    Also, in the sample code below, you will notice that above the code for @ref cv::Sobel function
-    there is also code for the @ref cv::Scharr function commented. Uncommenting it (and obviously
+    You can check out more information of this function in the OpenCV reference - **Scharr()** .
+    Also, in the sample code below, you will notice that above the code for **Sobel()** function
+    there is also code for the **Scharr()** function commented. Uncommenting it (and obviously
     commenting the Sobel stuff) should give you an idea of how this function works.
 
 Code
@@ -107,28 +110,55 @@ Code
     -   Applies the *Sobel Operator* and generates as output an image with the detected *edges*
         bright on a darker background.
 
--#  The tutorial code's is shown lines below. You can also download it from
-    [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp)
-    @include samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp
+-#  The tutorial code's is shown lines below.
+
+@add_toggle_cpp
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp)
+@include samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp
+@end_toggle
+
+@add_toggle_java
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/java/tutorial_code/ImgTrans/SobelDemo/SobelDemo.java)
+@include samples/java/tutorial_code/ImgTrans/SobelDemo/SobelDemo.java
+@end_toggle
+
+@add_toggle_python
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/python/tutorial_code/ImgTrans/SobelDemo/sobel_demo.py)
+@include samples/python/tutorial_code/ImgTrans/SobelDemo/sobel_demo.py
+@end_toggle
 
 Explanation
 -----------
 
--#  First we declare the variables we are going to use:
-    @snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp variables
--#  As usual we load our source image *src*:
-    @snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp load
--#  First, we apply a @ref cv::GaussianBlur to our image to reduce the noise ( kernel size = 3 )
-    @snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp reduce_noise
--#  Now we convert our filtered image to grayscale:
-    @snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp convert_to_gray
--#  Second, we calculate the "*derivatives*" in *x* and *y* directions. For this, we use the
-    function @ref cv::Sobel as shown below:
-    @snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp sobel
+#### Declare variables
+
+@snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp variables
+
+#### Load source image
+
+@snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp load
+
+#### Reduce noise
+
+@snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp reduce_noise
+
+#### Grayscale
+
+@snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp convert_to_gray
+
+#### Sobel Operator
+
+@snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp sobel
+
+-   We calculate the "derivatives" in *x* and *y* directions. For this, we use the
+    function **Sobel()** as shown below:
     The function takes the following arguments:
 
     -   *src_gray*: In our example, the input image. Here it is *CV_8U*
-    -   *grad_x*/*grad_y*: The output image.
+    -   *grad_x* / *grad_y* : The output image.
     -   *ddepth*: The depth of the output image. We set it to *CV_16S* to avoid overflow.
     -   *x_order*: The order of the derivative in **x** direction.
     -   *y_order*: The order of the derivative in **y** direction.
@@ -137,13 +167,20 @@ Explanation
     Notice that to calculate the gradient in *x* direction we use: \f$x_{order}= 1\f$ and
     \f$y_{order} = 0\f$. We do analogously for the *y* direction.
 
--#  We convert our partial results back to *CV_8U*:
-    @snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp convert
--#  Finally, we try to approximate the *gradient* by adding both directional gradients (note that
-    this is not an exact calculation at all! but it is good for our purposes).
-    @snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp blend
--#  Finally, we show our result:
-    @snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp display
+#### Convert output to a CV_8U image
+
+@snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp convert
+
+#### Gradient
+
+@snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp blend
+
+We try to approximate the *gradient* by adding both directional gradients (note that
+this is not an exact calculation at all! but it is good for our purposes).
+
+#### Show results
+
+@snippet cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp display
 
 Results
 -------
diff --git a/doc/tutorials/imgproc/morph_lines_detection/moprh_lines_detection.md b/doc/tutorials/imgproc/morph_lines_detection/moprh_lines_detection.md
deleted file mode 100644 (file)
index 23b748d..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-Extract horizontal and vertical lines by using morphological operations {#tutorial_moprh_lines_detection}
-=============
-
-Goal
-----
-
-In this tutorial you will learn how to:
-
--   Apply two very common morphology operators (i.e. Dilation and Erosion), with the creation of custom kernels, in order to extract straight lines on the horizontal and vertical axes. For this purpose, you will use the following OpenCV functions:
-    -   @ref cv::erode
-    -   @ref cv::dilate
-    -   @ref cv::getStructuringElement
-
-    in an example where your goal will be to extract the music notes from a music sheet.
-
-Theory
-------
-
-### Morphology Operations
-Morphology is a set of image processing operations that process images based on predefined *structuring elements* known also as kernels. The value of each pixel in the output image is based on a comparison of the corresponding pixel in the input image with its neighbors. By choosing the size and shape of the kernel, you can construct a morphological operation that is sensitive to specific shapes regarding the input image.
-
-Two of the most basic morphological operations are dilation and erosion. Dilation adds pixels to the boundaries of the object in an image, while erosion does exactly the opposite. The amount of pixels added or removed, respectively depends on the size and shape of the structuring element used to process the image. In general the rules followed from these two operations have as follows:
-
--   __Dilation__: The value of the output pixel is the <b><em>maximum</em></b> value of all the pixels that fall within the structuring element's size and shape. For example in a binary image, if any of the pixels of the input image falling within the range of the kernel is set to the value 1, the corresponding pixel of the output image will be set to 1 as well. The latter applies to any type of image (e.g. grayscale, bgr, etc).
-
-    ![Dilation on a Binary Image](images/morph21.gif)
-
-    ![Dilation on a Grayscale Image](images/morph6.gif)
-
--   __Erosion__: The vise versa applies for the erosion operation. The value of the output pixel is the <b><em>minimum</em></b> value of all the pixels that fall within the structuring element's size and shape. Look the at the example figures below:
-
-    ![Erosion on a Binary Image](images/morph211.png)
-
-    ![Erosion on a Grayscale Image](images/morph61.png)
-
-### Structuring Elements
-
-As it can be seen above and in general in any morphological operation the structuring element used to probe the input image, is the most important part.
-
-A structuring element is a matrix consisting of only 0's and 1's that can have any arbitrary shape and size. Typically are much smaller than the image being processed, while the pixels with values of 1 define the neighborhood. The center pixel of the structuring element, called the origin, identifies the pixel of interest -- the pixel being processed.
-
-For example, the following illustrates a diamond-shaped structuring element of 7x7 size.
-
-![A Diamond-Shaped Structuring Element and its Origin](images/morph12.gif)
-
-A structuring element can have many common shapes, such as lines, diamonds, disks, periodic lines, and circles and sizes. You typically choose a structuring element the same size and shape as the objects you want to process/extract in the input image. For example, to find lines in an image, create a linear structuring element as you will see later.
-
-Code
-----
-
-This tutorial code's is shown lines below. You can also download it from [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/ImgProc/Morphology_3.cpp).
-@include samples/cpp/tutorial_code/ImgProc/Morphology_3.cpp
-
-Explanation / Result
---------------------
-
--#  Load the source image and check if it is loaded without any problem, then show it:
-    @snippet samples/cpp/tutorial_code/ImgProc/Morphology_3.cpp load_image
-    ![](images/src.png)
-
--#  Then transform image to grayscale if it not already:
-    @snippet samples/cpp/tutorial_code/ImgProc/Morphology_3.cpp gray
-    ![](images/gray.png)
-
--#  Afterwards transform grayscale image to binary. Notice the ~ symbol which indicates that we use the inverse (i.e. bitwise_not) version of it:
-    @snippet samples/cpp/tutorial_code/ImgProc/Morphology_3.cpp bin
-    ![](images/binary.png)
-
--#  Now we are ready to apply morphological operations in order to extract the horizontal and vertical lines and as a consequence to separate the the music notes from the music sheet, but first let's initialize the output images that we will use for that reason:
-    @snippet samples/cpp/tutorial_code/ImgProc/Morphology_3.cpp init
-
--#  As we specified in the theory in order to extract the object that we desire, we need to create the corresponding structure element. Since here we want to extract the horizontal lines, a corresponding structure element for that purpose will have the following shape:
-    ![](images/linear_horiz.png)
-    and in the source code this is represented by the following code snippet:
-    @snippet samples/cpp/tutorial_code/ImgProc/Morphology_3.cpp horiz
-    ![](images/horiz.png)
-
--#  The same applies for the vertical lines, with the corresponding structure element:
-    ![](images/linear_vert.png)
-    and again this is represented as follows:
-    @snippet samples/cpp/tutorial_code/ImgProc/Morphology_3.cpp vert
-    ![](images/vert.png)
-
--#  As you can see we are almost there. However, at that point you will notice that the edges of the notes are a bit rough. For that reason we need to refine the edges in order to obtain a smoother result:
-    @snippet samples/cpp/tutorial_code/ImgProc/Morphology_3.cpp smooth
-    ![](images/smooth.png)
diff --git a/doc/tutorials/imgproc/morph_lines_detection/morph_lines_detection.md b/doc/tutorials/imgproc/morph_lines_detection/morph_lines_detection.md
new file mode 100644 (file)
index 0000000..4b0d3fa
--- /dev/null
@@ -0,0 +1,194 @@
+Extract horizontal and vertical lines by using morphological operations {#tutorial_morph_lines_detection}
+=============
+
+@prev_tutorial{tutorial_hitOrMiss}
+@next_tutorial{tutorial_pyramids}
+
+Goal
+----
+
+In this tutorial you will learn how to:
+
+-   Apply two very common morphology operators (i.e. Dilation and Erosion), with the creation of custom kernels, in order to extract straight lines on the horizontal and vertical axes. For this purpose, you will use the following OpenCV functions:
+    -   **erode()**
+    -   **dilate()**
+    -   **getStructuringElement()**
+
+    in an example where your goal will be to extract the music notes from a music sheet.
+
+Theory
+------
+
+### Morphology Operations
+Morphology is a set of image processing operations that process images based on predefined *structuring elements* known also as kernels. The value of each pixel in the output image is based on a comparison of the corresponding pixel in the input image with its neighbors. By choosing the size and shape of the kernel, you can construct a morphological operation that is sensitive to specific shapes regarding the input image.
+
+Two of the most basic morphological operations are dilation and erosion. Dilation adds pixels to the boundaries of the object in an image, while erosion does exactly the opposite. The amount of pixels added or removed, respectively depends on the size and shape of the structuring element used to process the image. In general the rules followed from these two operations have as follows:
+
+-   __Dilation__: The value of the output pixel is the <b><em>maximum</em></b> value of all the pixels that fall within the structuring element's size and shape. For example in a binary image, if any of the pixels of the input image falling within the range of the kernel is set to the value 1, the corresponding pixel of the output image will be set to 1 as well. The latter applies to any type of image (e.g. grayscale, bgr, etc).
+
+    ![Dilation on a Binary Image](images/morph21.gif)
+
+    ![Dilation on a Grayscale Image](images/morph6.gif)
+
+-   __Erosion__: The vise versa applies for the erosion operation. The value of the output pixel is the <b><em>minimum</em></b> value of all the pixels that fall within the structuring element's size and shape. Look the at the example figures below:
+
+    ![Erosion on a Binary Image](images/morph211.png)
+
+    ![Erosion on a Grayscale Image](images/morph61.png)
+
+### Structuring Elements
+
+As it can be seen above and in general in any morphological operation the structuring element used to probe the input image, is the most important part.
+
+A structuring element is a matrix consisting of only 0's and 1's that can have any arbitrary shape and size. Typically are much smaller than the image being processed, while the pixels with values of 1 define the neighborhood. The center pixel of the structuring element, called the origin, identifies the pixel of interest -- the pixel being processed.
+
+For example, the following illustrates a diamond-shaped structuring element of 7x7 size.
+
+![A Diamond-Shaped Structuring Element and its Origin](images/morph12.gif)
+
+A structuring element can have many common shapes, such as lines, diamonds, disks, periodic lines, and circles and sizes. You typically choose a structuring element the same size and shape as the objects you want to process/extract in the input image. For example, to find lines in an image, create a linear structuring element as you will see later.
+
+Code
+----
+
+This tutorial code's is shown lines below.
+
+@add_toggle_cpp
+You can also download it from [here](https://raw.githubusercontent.com/opencv/opencv/master/samples/cpp/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.cpp).
+@include samples/cpp/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.cpp
+@end_toggle
+
+@add_toggle_java
+You can also download it from [here](https://raw.githubusercontent.com/opencv/opencv/master/samples/java/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.java).
+@include samples/java/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.java
+@end_toggle
+
+@add_toggle_python
+You can also download it from [here](https://raw.githubusercontent.com/opencv/opencv/master/samples/python/tutorial_code/imgProc/morph_lines_detection/morph_lines_detection.py).
+@include samples/python/tutorial_code/imgProc/morph_lines_detection/morph_lines_detection.py
+@end_toggle
+
+Explanation / Result
+--------------------
+
+Get image from [here](https://raw.githubusercontent.com/opencv/opencv/master/doc/tutorials/imgproc/morph_lines_detection/images/src.png) .
+
+#### Load Image
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.cpp load_image
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.java load_image
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/imgProc/morph_lines_detection/morph_lines_detection.py load_image
+@end_toggle
+
+![](images/src.png)
+
+#### Grayscale
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.cpp gray
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.java gray
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/imgProc/morph_lines_detection/morph_lines_detection.py gray
+@end_toggle
+
+![](images/gray.png)
+
+#### Grayscale to Binary image
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.cpp bin
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.java bin
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/imgProc/morph_lines_detection/morph_lines_detection.py bin
+@end_toggle
+
+![](images/binary.png)
+
+#### Output images
+
+Now we are ready to apply morphological operations in order to extract the horizontal and vertical lines and as a consequence to separate the the music notes from the music sheet, but first let's initialize the output images that we will use for that reason:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.cpp init
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.java init
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/imgProc/morph_lines_detection/morph_lines_detection.py init
+@end_toggle
+
+#### Structure elements
+
+As we specified in the theory in order to extract the object that we desire, we need to create the corresponding structure element. Since  we want to extract the horizontal lines, a corresponding structure element for that purpose will have the following shape:
+![](images/linear_horiz.png)
+and in the source code this is represented by the following code snippet:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.cpp horiz
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.java horiz
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/imgProc/morph_lines_detection/morph_lines_detection.py horiz
+@end_toggle
+
+![](images/horiz.png)
+
+The same applies for the vertical lines, with the corresponding structure element:
+![](images/linear_vert.png)
+and again this is represented as follows:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.cpp vert
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.java vert
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/imgProc/morph_lines_detection/morph_lines_detection.py vert
+@end_toggle
+
+![](images/vert.png)
+
+#### Refine edges / Result
+
+As you can see we are almost there. However, at that point you will notice that the edges of the notes are a bit rough. For that reason we need to refine the edges in order to obtain a smoother result:
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.cpp smooth
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.java smooth
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/imgProc/morph_lines_detection/morph_lines_detection.py smooth
+@end_toggle
+
+![](images/smooth.png)
index 5210ec0..b832b22 100644 (file)
@@ -1,12 +1,15 @@
 Image Pyramids {#tutorial_pyramids}
 ==============
 
+@prev_tutorial{tutorial_morph_lines_detection}
+@next_tutorial{tutorial_threshold}
+
 Goal
 ----
 
 In this tutorial you will learn how to:
 
--   Use the OpenCV functions @ref cv::pyrUp and @ref cv::pyrDown to downsample or upsample a given
+-   Use the OpenCV functions **pyrUp()** and **pyrDown()** to downsample or upsample a given
     image.
 
 Theory
@@ -19,7 +22,7 @@ Theory
     -#  *Upsize* the image (zoom in) or
     -#  *Downsize* it (zoom out).
 -   Although there is a *geometric transformation* function in OpenCV that -literally- resize an
-    image (@ref cv::resize , which we will show in a future tutorial), in this section we analyze
+    image (**resize** , which we will show in a future tutorial), in this section we analyze
     first the use of **Image Pyramids**, which are widely applied in a huge range of vision
     applications.
 
@@ -52,12 +55,12 @@ Theory
     predecessor. Iterating this process on the input image \f$G_{0}\f$ (original image) produces the
     entire pyramid.
 -   The procedure above was useful to downsample an image. What if we want to make it bigger?:
-    columns filled with zeros (\f$0\f$)
+    columns filled with zeros (\f$0 \f$)
     -   First, upsize the image to twice the original in each dimension, wit the new even rows and
     -   Perform a convolution with the same kernel shown above (multiplied by 4) to approximate the
         values of the "missing pixels"
 -   These two procedures (downsampling and upsampling as explained above) are implemented by the
-    OpenCV functions @ref cv::pyrUp and @ref cv::pyrDown , as we will see in an example with the
+    OpenCV functions **pyrUp()** and **pyrDown()** , as we will see in an example with the
     code below:
 
 @note When we reduce the size of an image, we are actually *losing* information of the image.
@@ -65,76 +68,134 @@ Theory
 Code
 ----
 
-This tutorial code's is shown lines below. You can also download it from
-[here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/ImgProc/Pyramids.cpp)
+This tutorial code's is shown lines below.
+
+@add_toggle_cpp
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/cpp/tutorial_code/ImgProc/Pyramids/Pyramids.cpp)
+@include samples/cpp/tutorial_code/ImgProc/Pyramids/Pyramids.cpp
+@end_toggle
+
+@add_toggle_java
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/java/tutorial_code/ImgProc/Pyramids/Pyramids.java)
+@include samples/java/tutorial_code/ImgProc/Pyramids/Pyramids.java
+@end_toggle
 
-@include samples/cpp/tutorial_code/ImgProc/Pyramids.cpp
+@add_toggle_python
+You can also download it from
+[here](https://raw.githubusercontent.com/opencv/opencv/master/samples/python/tutorial_code/imgProc/Pyramids/pyramids.py)
+@include samples/python/tutorial_code/imgProc/Pyramids/pyramids.py
+@end_toggle
 
 Explanation
 -----------
 
 Let's check the general structure of the program:
 
--   Load an image (in this case it is defined in the program, the user does not have to enter it
-    as an argument)
-    @snippet cpp/tutorial_code/ImgProc/Pyramids.cpp load
+#### Load an image
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgProc/Pyramids/Pyramids.cpp load
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgProc/Pyramids/Pyramids.java load
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/imgProc/Pyramids/pyramids.py load
+@end_toggle
+
+#### Create window
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgProc/Pyramids/Pyramids.cpp show_image
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgProc/Pyramids/Pyramids.java show_image
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/imgProc/Pyramids/pyramids.py show_image
+@end_toggle
 
--   Create a Mat object to store the result of the operations (*dst*) and one to save temporal
-    results (*tmp*).
-    @code{.cpp}
-    Mat src, dst, tmp;
-    /* ... */
-    tmp = src;
-    dst = tmp;
-    @endcode
+#### Loop
 
--   Create a window to display the result
-    @snippet cpp/tutorial_code/ImgProc/Pyramids.cpp create_window
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgProc/Pyramids/Pyramids.cpp loop
+@end_toggle
 
--   Perform an infinite loop waiting for user input.
-    @snippet cpp/tutorial_code/ImgProc/Pyramids.cpp infinite_loop
+@add_toggle_java
+@snippet java/tutorial_code/ImgProc/Pyramids/Pyramids.java loop
+@end_toggle
 
-    Our program exits if the user presses *ESC*. Besides, it has two options:
+@add_toggle_python
+@snippet python/tutorial_code/imgProc/Pyramids/pyramids.py loop
+@end_toggle
 
-    -   **Perform upsampling (after pressing 'u')**
-        @snippet cpp/tutorial_code/ImgProc/Pyramids.cpp pyrup
-        We use the function @ref cv::pyrUp with three arguments:
+Perform an infinite loop waiting for user input.
+Our program exits if the user presses **ESC**. Besides, it has two options:
 
-        -   *tmp*: The current image, it is initialized with the *src* original image.
-        -   *dst*: The destination image (to be shown on screen, supposedly the double of the
+-   **Perform upsampling - Zoom 'i'n (after pressing 'i')**
+
+    We use the function **pyrUp()** with three arguments:
+        -   *src*: The current and destination image (to be shown on screen, supposedly the double of the
             input image)
-        -   *Size( tmp.cols*2, tmp.rows\*2 )\* : The destination size. Since we are upsampling,
-            @ref cv::pyrUp expects a size double than the input image (in this case *tmp*).
-    -   **Perform downsampling (after pressing 'd')**
-        @snippet cpp/tutorial_code/ImgProc/Pyramids.cpp pyrdown
-        Similarly as with @ref cv::pyrUp , we use the function @ref cv::pyrDown with three arguments:
-
-        -   *tmp*: The current image, it is initialized with the *src* original image.
-        -   *dst*: The destination image (to be shown on screen, supposedly half the input
-            image)
-        -   *Size( tmp.cols/2, tmp.rows/2 )* : The destination size. Since we are upsampling,
-            @ref cv::pyrDown expects half the size the input image (in this case *tmp*).
-    -   Notice that it is important that the input image can be divided by a factor of two (in
-        both dimensions). Otherwise, an error will be shown.
-    -   Finally, we update the input image **tmp** with the current image displayed, so the
-        subsequent operations are performed on it.
-        @snippet cpp/tutorial_code/ImgProc/Pyramids.cpp update_tmp
+        -   *Size( tmp.cols*2, tmp.rows\*2 )* : The destination size. Since we are upsampling,
+            **pyrUp()** expects a size double than the input image (in this case *src*).
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgProc/Pyramids/Pyramids.cpp pyrup
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgProc/Pyramids/Pyramids.java pyrup
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/imgProc/Pyramids/pyramids.py pyrup
+@end_toggle
+
+-   **Perform downsampling - Zoom 'o'ut (after pressing 'o')**
+
+    We use the function **pyrDown()** with three arguments (similarly to **pyrUp()**):
+            -   *src*: The current and destination image  (to be shown on screen, supposedly half the input
+                image)
+            -   *Size( tmp.cols/2, tmp.rows/2 )* : The destination size. Since we are upsampling,
+                **pyrDown()** expects half the size the input image (in this case *src*).
+
+@add_toggle_cpp
+@snippet cpp/tutorial_code/ImgProc/Pyramids/Pyramids.cpp pyrdown
+@end_toggle
+
+@add_toggle_java
+@snippet java/tutorial_code/ImgProc/Pyramids/Pyramids.java pyrdown
+@end_toggle
+
+@add_toggle_python
+@snippet python/tutorial_code/imgProc/Pyramids/pyramids.py pyrdown
+@end_toggle
+
+Notice that it is important that the input image can be divided by a factor of two (in both dimensions).
+Otherwise, an error will be shown.
 
 Results
 -------
 
--   After compiling the code above we can test it. The program calls an image **chicky_512.jpg**
-    that comes in the *samples/data* folder. Notice that this image is \f$512 \times 512\f$,
+-   The program calls by default an image [chicky_512.png](https://raw.githubusercontent.com/opencv/opencv/master/samples/data/chicky_512.png)
+    that comes in the `samples/data` folder. Notice that this image is \f$512 \times 512\f$,
     hence a downsample won't generate any error (\f$512 = 2^{9}\f$). The original image is shown below:
 
     ![](images/Pyramids_Tutorial_Original_Image.jpg)
 
--   First we apply two successive @ref cv::pyrDown operations by pressing 'd'. Our output is:
+-   First we apply two successive **pyrDown()** operations by pressing 'd'. Our output is:
 
     ![](images/Pyramids_Tutorial_PyrDown_Result.jpg)
 
 -   Note that we should have lost some resolution due to the fact that we are diminishing the size
-    of the image. This is evident after we apply @ref cv::pyrUp twice (by pressing 'u'). Our output
+    of the image. This is evident after we apply **pyrUp()** twice (by pressing 'u'). Our output
     is now:
 
     ![](images/Pyramids_Tutorial_PyrUp_Result.jpg)
index 42dd4ce..6b339e3 100644 (file)
@@ -5,6 +5,8 @@ In this section you will learn about the image processing (manipulation) functio
 
 -   @subpage tutorial_gausian_median_blur_bilateral_filter
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
@@ -27,7 +29,9 @@ In this section you will learn about the image processing (manipulation) functio
 
     Here we investigate different morphology operators
 
--      @subpage tutorial_hitOrMiss
+-   @subpage tutorial_hitOrMiss
+
+    *Languages:* C++, Java, Python
 
     *Compatibility:* \> OpenCV 2.4
 
@@ -35,7 +39,9 @@ In this section you will learn about the image processing (manipulation) functio
 
     Learn how to find patterns in binary images using the Hit-or-Miss operation
 
--   @subpage tutorial_moprh_lines_detection
+-   @subpage tutorial_morph_lines_detection
+
+    *Languages:* C++, Java, Python
 
     *Compatibility:* \> OpenCV 2.0
 
@@ -45,6 +51,8 @@ In this section you will learn about the image processing (manipulation) functio
 
 -   @subpage tutorial_pyramids
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
@@ -69,6 +77,8 @@ In this section you will learn about the image processing (manipulation) functio
 
 -   @subpage tutorial_filter_2d
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
@@ -77,6 +87,8 @@ In this section you will learn about the image processing (manipulation) functio
 
 -   @subpage tutorial_copyMakeBorder
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
@@ -85,6 +97,8 @@ In this section you will learn about the image processing (manipulation) functio
 
 -   @subpage tutorial_sobel_derivatives
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
@@ -93,6 +107,8 @@ In this section you will learn about the image processing (manipulation) functio
 
 -   @subpage tutorial_laplace_operator
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
@@ -109,6 +125,8 @@ In this section you will learn about the image processing (manipulation) functio
 
 -   @subpage tutorial_hough_lines
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
@@ -117,6 +135,8 @@ In this section you will learn about the image processing (manipulation) functio
 
 -   @subpage tutorial_hough_circle
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
diff --git a/doc/tutorials/introduction/cross_referencing/tutorial_cross_referencing.markdown b/doc/tutorials/introduction/cross_referencing/tutorial_cross_referencing.markdown
new file mode 100644 (file)
index 0000000..acc788d
--- /dev/null
@@ -0,0 +1,61 @@
+Cross referencing OpenCV from other Doxygen projects {#tutorial_cross_referencing}
+====================================================
+
+Cross referencing OpenCV
+------------------------
+
+[Doxygen](https://www.stack.nl/~dimitri/doxygen/) is a tool to generate
+documentations like the OpenCV documentation you are reading right now.
+It is used by a variety of software projects and if you happen to use it
+to generate your own documentation, and you are using OpenCV inside your
+project, this short tutorial is for you.
+
+Imagine this warning inside your documentation code:
+
+@code
+/**
+ * @warning This functions returns a cv::Mat.
+ */
+@endcode
+
+Inside your generated documentation this warning will look roughly like this:
+
+@warning This functions returns a %cv::Mat.
+
+While inside the OpenCV documentation the `%cv::Mat` is rendered as a link:
+
+@warning This functions returns a cv::Mat.
+
+To generate links to the OpenCV documentation inside your project, you only
+have to perform two small steps. First download the file
+[opencv.tag](opencv.tag) (right-click and choose "save as...") and place it
+somewhere in your project directory, for example as
+`docs/doxygen-tags/opencv.tag`.
+
+Open your Doxyfile using your favorite text editor and search for the key
+`TAGFILES`. Change it as follows:
+
+@code
+TAGFILES = ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.3.0
+@endcode
+
+If you had other definitions already, you can append the line using a `\`:
+
+@code
+TAGFILES = ./docs/doxygen-tags/libstdc++.tag=https://gcc.gnu.org/onlinedocs/libstdc++/latest-doxygen \
+           ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.3.0
+@endcode
+
+Doxygen can now use the information from the tag file to link to the OpenCV
+documentation. Rebuild your documentation right now!
+
+@note To allow others to also use a *.tag file to link to your documentation,
+set `GENERATE_TAGFILE = html/your_project.tag`. Your documentation will now
+contain a `your_project.tag` file in its root directory.
+
+
+References
+----------
+
+- [Doxygen: Linking to external documentation](https://www.stack.nl/~dimitri/doxygen/manual/external.html)
+- [opencv.tag](opencv.tag)
index 37820a8..ebd2793 100644 (file)
@@ -171,3 +171,11 @@ Additionally you can find very basic sample source code to introduce you to the
     _Author:_ Maksim Shabunin
 
     This document describes some aspects of 2.4 -> 3.0 transition process.
+
+-   @subpage tutorial_cross_referencing
+
+    _Compatibility:_ \> OpenCV 3.3.0
+
+    _Author:_ Sebastian Höffner
+
+    This document outlines how to create cross references to the OpenCV documentation from other Doxygen projects.
diff --git a/doc/tutorials/viz/histo3D/histo3D.markdown b/doc/tutorials/viz/histo3D/histo3D.markdown
new file mode 100644 (file)
index 0000000..fdf174c
--- /dev/null
@@ -0,0 +1,51 @@
+Creating a 3D histogram {#tutorial_histo3D}
+================
+
+Goal
+----
+
+In this tutorial you will learn how to
+
+-   Create your own callback keyboard function for viz window.
+-   Show your 3D histogram in a viz window.
+
+Code
+----
+
+You can download the code from [here ](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/viz/histo3D.cpp).
+@include samples/cpp/tutorial_code/viz/histo3D.cpp
+
+Explanation
+-----------
+
+Here is the general structure of the program:
+
+-   You can give full path to an image in command line
+    @snippet histo3D.cpp command_line_parser
+
+    or without path, a synthetic image is generated with pixel values are a gaussian distribution @ref cv::RNG::fill center(60+/-10,40+/-5,50+/-20) in first quadrant,
+    (160+/-20,10+/-5,50+/-10) in second quadrant, (90+/-10,100+/-20,50+/-20) in third quadrant, (100+/-10,10+/-5,150+/-40) in last quadrant.
+    @snippet histo3D.cpp synthetic_image
+    Image tridimensional histogram is calculated using opencv @ref cv::calcHist and @ref cv::normalize between 0 and 100.
+    @snippet histo3D.cpp calchist_for_histo3d
+    channel are 2, 1 and 0 to synchronise color with Viz axis color in objetc cv::viz::WCoordinateSystem.
+
+    A slidebar is inserted in image window. Init slidebar value is 90, it means that only histogram cell greater than 9/100000.0 (23 pixels for an 512X512 pixels) will be display.
+    @snippet histo3D.cpp slide_bar_for_thresh
+    We are ready to open a viz window with a callback function to capture keyboard event in viz window. Using @ref cv::viz::Viz3d::spinOnce enable keyboard event to be capture in @ref cv::imshow window too.
+    @snippet histo3D.cpp manage_viz_imshow_window
+    The function DrawHistogram3D processes histogram Mat to display it in a Viz window. Number of plan, row and column in [three dimensional Mat](@ref CVMat_Details ) can be found using  this code :
+    @snippet histo3D.cpp get_cube_size
+    To get histogram value at a specific location we use @ref cv::Mat::at(int i0,int i1, int i2)  method with three arguments k, i and j where k is plane number, i row number and j column number.
+    @snippet histo3D.cpp get_cube_values
+
+-   Callback function
+    Principle are as mouse callback function. Key code pressed is in field code of class @ref cv::viz::KeyboardEvent.
+    @snippet histo3D.cpp viz_keyboard_callback
+
+Results
+-------
+
+Here is the result of the program with no argument and threshold equal to 50.
+
+![](images/histo50.png)
diff --git a/doc/tutorials/viz/histo3D/images/histo50.png b/doc/tutorials/viz/histo3D/images/histo50.png
new file mode 100644 (file)
index 0000000..b2b4416
Binary files /dev/null and b/doc/tutorials/viz/histo3D/images/histo50.png differ
index db20f1e..fae1396 100644 (file)
@@ -32,3 +32,11 @@ OpenCV Viz {#tutorial_table_of_content_viz}
     *Author:* Ozan Tonkal
 
     You will learn how to create your own widgets.
+
+-   @subpage tutorial_histo3D
+
+    *Compatibility:* \> OpenCV 3.0.0
+
+    *Author:* Laurent Berger
+
+    You will learn how to plot a 3D histogram.
index 343ca95..e3783c0 100644 (file)
@@ -783,6 +783,14 @@ struct CV_EXPORTS_W_SIMPLE CirclesGridFinderParameters
     GridType gridType;
 };
 
+struct CV_EXPORTS_W_SIMPLE CirclesGridFinderParameters2 : public CirclesGridFinderParameters
+{
+    CV_WRAP CirclesGridFinderParameters2();
+
+    CV_PROP_RW float squareSize; //!< Distance between two adjacent points. Used by CALIB_CB_CLUSTERING.
+    CV_PROP_RW float maxRectifiedDistance; //!< Max deviation from predicion. Used by CALIB_CB_CLUSTERING.
+};
+
 /** @brief Finds centers in the grid of circles.
 
 @param image grid view of input circles; it must be an 8-bit grayscale or color image.
@@ -821,6 +829,12 @@ CV_EXPORTS_W bool findCirclesGrid( InputArray image, Size patternSize,
                                    CirclesGridFinderParameters parameters);
 
 /** @overload */
+CV_EXPORTS_W bool findCirclesGrid2( InputArray image, Size patternSize,
+                                   OutputArray centers, int flags,
+                                   const Ptr<FeatureDetector> &blobDetector,
+                                   CirclesGridFinderParameters2 parameters);
+
+/** @overload */
 CV_EXPORTS_W bool findCirclesGrid( InputArray image, Size patternSize,
                                    OutputArray centers, int flags = CALIB_CB_SYMMETRIC_GRID,
                                    const Ptr<FeatureDetector> &blobDetector = SimpleBlobDetector::create());
index 0e35196..4c2d9d2 100644 (file)
@@ -1,6 +1,7 @@
 {
     "class_ignore_list": [
-        "CirclesGridFinderParameters"
+        "CirclesGridFinderParameters",
+        "CirclesGridFinderParameters2"
     ],
     "missing_consts" : {
         "Calib3d": {
index c93415c..ad08f3e 100644 (file)
@@ -2094,9 +2094,19 @@ void cv::drawChessboardCorners( InputOutputArray _image, Size patternSize,
                              nelems, patternWasFound );
 }
 
-bool cv::findCirclesGrid( InputArray _image, Size patternSize,
+bool cv::findCirclesGrid( InputArray image, Size patternSize,
+                                   OutputArray centers, int flags,
+                                   const Ptr<FeatureDetector> &blobDetector,
+                                   CirclesGridFinderParameters parameters)
+{
+    CirclesGridFinderParameters2 parameters2;
+    *((CirclesGridFinderParameters*)&parameters2) = parameters;
+    return cv::findCirclesGrid2(image, patternSize, centers, flags, blobDetector, parameters2);
+}
+
+bool cv::findCirclesGrid2( InputArray _image, Size patternSize,
                           OutputArray _centers, int flags, const Ptr<FeatureDetector> &blobDetector,
-                          CirclesGridFinderParameters parameters)
+                          CirclesGridFinderParameters2 parameters)
 {
     CV_INSTRUMENT_REGION()
 
@@ -2115,19 +2125,19 @@ bool cv::findCirclesGrid( InputArray _image, Size patternSize,
       points.push_back (keypoints[i].pt);
     }
 
+    if(flags & CALIB_CB_ASYMMETRIC_GRID)
+      parameters.gridType = CirclesGridFinderParameters::ASYMMETRIC_GRID;
+    if(flags & CALIB_CB_SYMMETRIC_GRID)
+      parameters.gridType = CirclesGridFinderParameters::SYMMETRIC_GRID;
+
     if(flags & CALIB_CB_CLUSTERING)
     {
-      CirclesGridClusterFinder circlesGridClusterFinder(isAsymmetricGrid);
+      CirclesGridClusterFinder circlesGridClusterFinder(parameters);
       circlesGridClusterFinder.findGrid(points, patternSize, centers);
       Mat(centers).copyTo(_centers);
       return !centers.empty();
     }
 
-    if(flags & CALIB_CB_ASYMMETRIC_GRID)
-      parameters.gridType = CirclesGridFinderParameters::ASYMMETRIC_GRID;
-    if(flags & CALIB_CB_SYMMETRIC_GRID)
-      parameters.gridType = CirclesGridFinderParameters::SYMMETRIC_GRID;
-
     const int attempts = 2;
     const size_t minHomographyPoints = 4;
     Mat H;
@@ -2191,7 +2201,7 @@ bool cv::findCirclesGrid( InputArray _image, Size patternSize,
 bool cv::findCirclesGrid( InputArray _image, Size patternSize,
                           OutputArray _centers, int flags, const Ptr<FeatureDetector> &blobDetector)
 {
-    return cv::findCirclesGrid(_image, patternSize, _centers, flags, blobDetector, CirclesGridFinderParameters());
+    return cv::findCirclesGrid2(_image, patternSize, _centers, flags, blobDetector, CirclesGridFinderParameters2());
 }
 
 /* End of file. */
index 4cf0779..b55101a 100644 (file)
@@ -559,6 +559,13 @@ CirclesGridFinderParameters::CirclesGridFinderParameters()
   gridType = SYMMETRIC_GRID;
 }
 
+CirclesGridFinderParameters2::CirclesGridFinderParameters2()
+: CirclesGridFinderParameters()
+{
+    squareSize = 1.0f;
+    maxRectifiedDistance = squareSize/2.0f;
+}
+
 CirclesGridFinder::CirclesGridFinder(Size _patternSize, const std::vector<Point2f> &testKeypoints,
                                      const CirclesGridFinderParameters &_parameters) :
   patternSize(static_cast<size_t> (_patternSize.width), static_cast<size_t> (_patternSize.height))
index 8f55f6c..ad0f916 100644 (file)
@@ -56,11 +56,11 @@ class CirclesGridClusterFinder
     CirclesGridClusterFinder& operator=(const CirclesGridClusterFinder&);
     CirclesGridClusterFinder(const CirclesGridClusterFinder&);
 public:
-  CirclesGridClusterFinder(bool _isAsymmetricGrid)
+  CirclesGridClusterFinder(const cv::CirclesGridFinderParameters2 &parameters)
   {
-    isAsymmetricGrid = _isAsymmetricGrid;
-    squareSize = 1.0f;
-    maxRectifiedDistance = (float)(squareSize / 2.0);
+    isAsymmetricGrid = parameters.gridType == cv::CirclesGridFinderParameters::ASYMMETRIC_GRID;
+    squareSize = parameters.squareSize;
+    maxRectifiedDistance = parameters.maxRectifiedDistance;
   }
   void findGrid(const std::vector<cv::Point2f> &points, cv::Size patternSize, std::vector<cv::Point2f>& centers);
 
index 13b998a..86d74b2 100644 (file)
@@ -928,7 +928,7 @@ double cv::fisheye::stereoCalibrate(InputArrayOfArrays objectPoints, InputArrayO
     intrinsicRight_errors.isEstimate = intrinsicRight.isEstimate;
 
     std::vector<uchar> selectedParams;
-    std::vector<int> tmp(6 * (n_images + 1), 1);
+    std::vector<uchar> tmp(6 * (n_images + 1), 1);
     selectedParams.insert(selectedParams.end(), intrinsicLeft.isEstimate.begin(), intrinsicLeft.isEstimate.end());
     selectedParams.insert(selectedParams.end(), intrinsicRight.isEstimate.begin(), intrinsicRight.isEstimate.end());
     selectedParams.insert(selectedParams.end(), tmp.begin(), tmp.end());
index 26fefcb..a9ed1f1 100644 (file)
@@ -5,7 +5,7 @@ ocv_add_dispatched_file(stat SSE4_2 AVX2)
 
 ocv_add_module(core
                OPTIONAL opencv_cudev
-               WRAP java python)
+               WRAP java python js)
 
 set(extra_libs "")
 
index 880c04c..8054d31 100644 (file)
@@ -1913,8 +1913,9 @@ matrix src:
 @code
     src*eigenvectors.row(i).t() = eigenvalues.at<srcType>(i)*eigenvectors.row(i).t()
 @endcode
-@note in the new and the old interfaces different ordering of eigenvalues and eigenvectors
-parameters is used.
+
+@note Use cv::eigenNonSymmetric for calculation of real eigenvalues and eigenvectors of non-symmetric matrix.
+
 @param src input matrix that must have CV_32FC1 or CV_64FC1 type, square size and be symmetrical
 (src ^T^ == src).
 @param eigenvalues output vector of eigenvalues of the same type as src; the eigenvalues are stored
@@ -1922,11 +1923,28 @@ in the descending order.
 @param eigenvectors output matrix of eigenvectors; it has the same size and type as src; the
 eigenvectors are stored as subsequent matrix rows, in the same order as the corresponding
 eigenvalues.
-@sa completeSymm , PCA
+@sa eigenNonSymmetric, completeSymm , PCA
 */
 CV_EXPORTS_W bool eigen(InputArray src, OutputArray eigenvalues,
                         OutputArray eigenvectors = noArray());
 
+/** @brief Calculates eigenvalues and eigenvectors of a non-symmetric matrix (real eigenvalues only).
+
+@note Assumes real eigenvalues.
+
+The function calculates eigenvalues and eigenvectors (optional) of the square matrix src:
+@code
+    src*eigenvectors.row(i).t() = eigenvalues.at<srcType>(i)*eigenvectors.row(i).t()
+@endcode
+
+@param src input matrix (CV_32FC1 or CV_64FC1 type).
+@param eigenvalues output vector of eigenvalues (type is the same type as src).
+@param eigenvectors output matrix of eigenvectors (type is the same type as src). The eigenvectors are stored as subsequent matrix rows, in the same order as the corresponding eigenvalues.
+@sa eigen
+*/
+CV_EXPORTS_W void eigenNonSymmetric(InputArray src, OutputArray eigenvalues,
+                                    OutputArray eigenvectors);
+
 /** @brief Calculates the covariance matrix of a set of vectors.
 
 The function cv::calcCovarMatrix calculates the covariance matrix and, optionally, the mean vector of
index 62d767f..113954d 100644 (file)
@@ -740,5 +740,6 @@ CV_EXPORTS_W void setUseIPP_NE(bool flag);
 } // cv
 
 #include "opencv2/core/neon_utils.hpp"
+#include "opencv2/core/vsx_utils.hpp"
 
 #endif //OPENCV_CORE_BASE_HPP
index 5de35bc..5261a41 100644 (file)
 #  include <arm_neon.h>
 #endif
 
+#if defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
+#  include <altivec.h>
+#  undef vector
+#  undef pixel
+#  undef bool
+#  define CV_VSX 1
+#endif
+
 #endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
 
 #if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
@@ -135,6 +143,12 @@ struct VZeroUpperGuard {
 #elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
 #  include <arm_neon.h>
 #  define CV_NEON 1
+#elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
+#  include <altivec.h>
+#  undef vector
+#  undef pixel
+#  undef bool
+#  define CV_VSX 1
 #endif
 
 #endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
@@ -208,3 +222,7 @@ struct VZeroUpperGuard {
 #ifndef CV_NEON
 #  define CV_NEON 0
 #endif
+
+#ifndef CV_VSX
+#  define CV_VSX 0
+#endif
index 6eaed9e..66a473f 100644 (file)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...)  CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX
+#  define CV_TRY_VSX 1
+#  define CV_CPU_HAS_SUPPORT_VSX 1
+#  define CV_CPU_CALL_VSX(fn, args) return (opt_VSX::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX
+#  define CV_TRY_VSX 1
+#  define CV_CPU_HAS_SUPPORT_VSX (cv::checkHardwareSupport(CV_CPU_VSX))
+#  define CV_CPU_CALL_VSX(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
+#else
+#  define CV_TRY_VSX 0
+#  define CV_CPU_HAS_SUPPORT_VSX 0
+#  define CV_CPU_CALL_VSX(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...)  CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
 #define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
 #define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...)  CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
index 70bbf93..b513b44 100644 (file)
@@ -153,6 +153,8 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
 
 #define CV_CPU_NEON   100
 
+#define CV_CPU_VSX 200
+
 // when adding to this list remember to update the following enum
 #define CV_HARDWARE_MAX_FEATURE 255
 
@@ -182,7 +184,9 @@ enum CpuFeatures {
     CPU_AVX_512VBMI     = 20,
     CPU_AVX_512VL       = 21,
 
-    CPU_NEON            = 100
+    CPU_NEON            = 100,
+
+    CPU_VSX             = 200
 };
 
 
index d6dedc1..26fd918 100644 (file)
@@ -308,6 +308,7 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 #ifdef CV_DOXYGEN
 #   undef CV_SSE2
 #   undef CV_NEON
+#   undef CV_VSX
 #endif
 
 #if CV_SSE2
@@ -318,6 +319,10 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 
 #include "opencv2/core/hal/intrin_neon.hpp"
 
+#elif CV_VSX
+
+#include "opencv2/core/hal/intrin_vsx.hpp"
+
 #else
 
 #include "opencv2/core/hal/intrin_cpp.hpp"
diff --git a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp
new file mode 100644 (file)
index 0000000..3d15945
--- /dev/null
@@ -0,0 +1,927 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_VSX_HPP
+#define OPENCV_HAL_VSX_HPP
+
+#include <algorithm>
+#include "opencv2/core/utility.hpp"
+
+#define CV_SIMD128 1
+#define CV_SIMD128_64F 1
+
+/**
+ * todo: supporting half precision for power9
+ * convert instractions xvcvhpsp, xvcvsphp
+**/
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+///////// Types ////////////
+
+struct v_uint8x16
+{
+    typedef uchar lane_type;
+    enum { nlanes = 16 };
+    vec_uchar16 val;
+
+    explicit v_uint8x16(const vec_uchar16& v) : val(v)
+    {}
+    v_uint8x16() : val(vec_uchar16_z)
+    {}
+    v_uint8x16(vec_bchar16 v) : val(vec_uchar16_c(v))
+    {}
+    v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
+               uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
+        : val(vec_uchar16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
+    {}
+    uchar get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_int8x16
+{
+    typedef schar lane_type;
+    enum { nlanes = 16 };
+    vec_char16 val;
+
+    explicit v_int8x16(const vec_char16& v) : val(v)
+    {}
+    v_int8x16() : val(vec_char16_z)
+    {}
+    v_int8x16(vec_bchar16 v) : val(vec_char16_c(v))
+    {}
+    v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
+              schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
+        : val(vec_char16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
+    {}
+    schar get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_uint16x8
+{
+    typedef ushort lane_type;
+    enum { nlanes = 8 };
+    vec_ushort8 val;
+
+    explicit v_uint16x8(const vec_ushort8& v) : val(v)
+    {}
+    v_uint16x8() : val(vec_ushort8_z)
+    {}
+    v_uint16x8(vec_bshort8 v) : val(vec_ushort8_c(v))
+    {}
+    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
+        : val(vec_ushort8_set(v0, v1, v2, v3, v4, v5, v6, v7))
+    {}
+    ushort get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_int16x8
+{
+    typedef short lane_type;
+    enum { nlanes = 8 };
+    vec_short8 val;
+
+    explicit v_int16x8(const vec_short8& v) : val(v)
+    {}
+    v_int16x8() : val(vec_short8_z)
+    {}
+    v_int16x8(vec_bshort8 v) : val(vec_short8_c(v))
+    {}
+    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
+        : val(vec_short8_set(v0, v1, v2, v3, v4, v5, v6, v7))
+    {}
+    short get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_uint32x4
+{
+    typedef unsigned lane_type;
+    enum { nlanes = 4 };
+    vec_uint4 val;
+
+    explicit v_uint32x4(const vec_uint4& v) : val(v)
+    {}
+    v_uint32x4() : val(vec_uint4_z)
+    {}
+    v_uint32x4(vec_bint4 v) : val(vec_uint4_c(v))
+    {}
+    v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) : val(vec_uint4_set(v0, v1, v2, v3))
+    {}
+    uint get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_int32x4
+{
+    typedef int lane_type;
+    enum { nlanes = 4 };
+    vec_int4 val;
+
+    explicit v_int32x4(const vec_int4& v) : val(v)
+    {}
+    v_int32x4() : val(vec_int4_z)
+    {}
+    v_int32x4(vec_bint4 v) : val(vec_int4_c(v))
+    {}
+    v_int32x4(int v0, int v1, int v2, int v3) : val(vec_int4_set(v0, v1, v2, v3))
+    {}
+    int get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_float32x4
+{
+    typedef float lane_type;
+    enum { nlanes = 4 };
+    vec_float4 val;
+
+    explicit v_float32x4(const vec_float4& v) : val(v)
+    {}
+    v_float32x4() : val(vec_float4_z)
+    {}
+    v_float32x4(vec_bint4 v) : val(vec_float4_c(v))
+    {}
+    v_float32x4(float v0, float v1, float v2, float v3) : val(vec_float4_set(v0, v1, v2, v3))
+    {}
+    float get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_uint64x2
+{
+    typedef uint64 lane_type;
+    enum { nlanes = 2 };
+    vec_udword2 val;
+
+    explicit v_uint64x2(const vec_udword2& v) : val(v)
+    {}
+    v_uint64x2() : val(vec_udword2_z)
+    {}
+    v_uint64x2(vec_bdword2 v) : val(vec_udword2_c(v))
+    {}
+    v_uint64x2(uint64 v0, uint64 v1) : val(vec_udword2_set(v0, v1))
+    {}
+    uint64 get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_int64x2
+{
+    typedef int64 lane_type;
+    enum { nlanes = 2 };
+    vec_dword2 val;
+
+    explicit v_int64x2(const vec_dword2& v) : val(v)
+    {}
+    v_int64x2() : val(vec_dword2_z)
+    {}
+    v_int64x2(vec_bdword2 v) : val(vec_dword2_c(v))
+    {}
+    v_int64x2(int64 v0, int64 v1) : val(vec_dword2_set(v0, v1))
+    {}
+    int64 get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_float64x2
+{
+    typedef double lane_type;
+    enum { nlanes = 2 };
+    vec_double2 val;
+
+    explicit v_float64x2(const vec_double2& v) : val(v)
+    {}
+    v_float64x2() : val(vec_double2_z)
+    {}
+    v_float64x2(vec_bdword2 v) : val(vec_double2_c(v))
+    {}
+    v_float64x2(double v0, double v1) : val(vec_double2_set(v0, v1))
+    {}
+    double get0() const
+    { return vec_extract(val, 0); }
+};
+
+//////////////// Load and store operations ///////////////
+
+/*
+ * clang-5 aborted during parse "vec_xxx_c" only if it's
+ * inside a function template which is defined by preprocessor macro.
+ *
+ * if vec_xxx_c defined as C++ cast, clang-5 will pass it
+*/
+#define OPENCV_HAL_IMPL_VSX_INITVEC(_Tpvec, _Tp, suffix, cast)                        \
+inline _Tpvec v_setzero_##suffix() { return _Tpvec(); }                               \
+inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(vec_splats((_Tp)v));}          \
+template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0 &a)  \
+{ return _Tpvec((cast)a.val); }
+
+OPENCV_HAL_IMPL_VSX_INITVEC(v_uint8x16, uchar, u8, vec_uchar16)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_int8x16, schar, s8, vec_char16)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_uint16x8, ushort, u16, vec_ushort8)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_int16x8, short, s16, vec_short8)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_uint32x4, uint, u32, vec_uint4)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_int32x4, int, s32, vec_int4)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_uint64x2, uint64, u64, vec_udword2)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2)
+
+#define OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(_Tpvec, _Tp, ld_func, st_func) \
+inline _Tpvec v_load(const _Tp* ptr)                                        \
+{ return _Tpvec(ld_func(0, ptr)); }                                         \
+inline _Tpvec v_load_aligned(const _Tp* ptr)                                \
+{ return _Tpvec(ld_func(0, ptr)); }                                         \
+inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1)               \
+{ return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); }          \
+inline void v_store(_Tp* ptr, const _Tpvec& a)                              \
+{ st_func(a.val, 0, ptr); }                                                 \
+inline void v_store_aligned(_Tp* ptr, const _Tpvec& a)                      \
+{ st_func(a.val, 0, ptr); }                                                 \
+inline void v_store_low(_Tp* ptr, const _Tpvec& a)                          \
+{ vec_st_l8(a.val, ptr); }                                                  \
+inline void v_store_high(_Tp* ptr, const _Tpvec& a)                         \
+{ vec_st_h8(a.val, ptr); }
+
+OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint8x16, uchar, vsx_ld, vsx_st)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int8x16, schar, vsx_ld, vsx_st)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint16x8, ushort, vsx_ld, vsx_st)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int16x8, short, vsx_ld, vsx_st)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint32x4, uint, vsx_ld, vsx_st)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int32x4, int, vsx_ld, vsx_st)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float32x4, float, vsx_ld, vsx_st)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float64x2, double, vsx_ld, vsx_st)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint64x2, uint64, vsx_ld2, vsx_st2)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int64x2, int64, vsx_ld2, vsx_st2)
+
+//////////////// Value reordering ///////////////
+
+/* de&interleave */
+#define OPENCV_HAL_IMPL_VSX_INTERLEAVE(_Tp, _Tpvec)                          \
+inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b)        \
+{ vec_ld_deinterleave(ptr, a.val, b.val);}                                   \
+inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a,                   \
+                                _Tpvec& b, _Tpvec& c)                        \
+{ vec_ld_deinterleave(ptr, a.val, b.val, c.val); }                           \
+inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b,        \
+                                                _Tpvec& c, _Tpvec& d)        \
+{ vec_ld_deinterleave(ptr, a.val, b.val, c.val, d.val); }                    \
+inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b)   \
+{ vec_st_interleave(a.val, b.val, ptr); }                                    \
+inline void v_store_interleave(_Tp* ptr, const _Tpvec& a,                    \
+                               const _Tpvec& b, const _Tpvec& c)             \
+{ vec_st_interleave(a.val, b.val, c.val, ptr); }                             \
+inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b,   \
+                                         const _Tpvec& c, const _Tpvec& d)   \
+{ vec_st_interleave(a.val, b.val, c.val, d.val, ptr); }
+
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(uchar, v_uint8x16)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(schar, v_int8x16)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(ushort, v_uint16x8)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(short, v_int16x8)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint, v_uint32x4)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(int, v_int32x4)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(float, v_float32x4)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(double, v_float64x2)
+
+/* Expand */
+#define OPENCV_HAL_IMPL_VSX_EXPAND(_Tpvec, _Tpwvec, _Tp, fl, fh)  \
+inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1)   \
+{                                                                 \
+    b0.val = fh(a.val);                                           \
+    b1.val = fl(a.val);                                           \
+}                                                                 \
+inline _Tpwvec v_load_expand(const _Tp* ptr)                      \
+{ return _Tpwvec(fh(vsx_ld(0, ptr))); }
+
+OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_uint16x8, v_uint32x4, ushort, vec_unpacklu, vec_unpackhu)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_int16x8, v_int32x4, short, vec_unpackl, vec_unpackh)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpackhu)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh)
+
+inline v_uint32x4 v_load_expand_q(const uchar* ptr)
+{ return v_uint32x4(vec_ld_buw(ptr)); }
+
+inline v_int32x4 v_load_expand_q(const schar* ptr)
+{ return v_int32x4(vec_ld_bsw(ptr)); }
+
+/* pack */
+#define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack)    \
+inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b)                                          \
+{                                                                                                   \
+    return _Tpvec(pkfnc(a.val, b.val));                                                             \
+}                                                                                                   \
+inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a)                                            \
+{                                                                                                   \
+    vec_st_l8(pkfnc(a.val, a.val), ptr);                                                            \
+}                                                                                                   \
+template<int n>                                                                                     \
+inline _Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b)                                     \
+{                                                                                                   \
+    const __vector _Tpvn vn = vec_splats((_Tpvn)n);                                                 \
+    const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1)));                         \
+    return _Tpvec(pkfnc(sfnc(addfnc(a.val, delta), vn), sfnc(addfnc(b.val, delta), vn)));           \
+}                                                                                                   \
+template<int n>                                                                                     \
+inline void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a)                                       \
+{                                                                                                   \
+    const __vector _Tpvn vn = vec_splats((_Tpvn)n);                                                 \
+    const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1)));                         \
+    vec_st_l8(pkfnc(sfnc(addfnc(a.val, delta), vn), delta), ptr);                                   \
+}
+
+OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_uint16x8, unsigned short, unsigned short,
+                         vec_sr, vec_packs, vec_adds, pack)
+OPENCV_HAL_IMPL_VSX_PACK(v_int8x16, schar, v_int16x8, unsigned short, short,
+                         vec_sra, vec_packs, vec_adds, pack)
+
+OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_uint32x4, unsigned int, unsigned int,
+                         vec_sr, vec_packs, vec_add, pack)
+OPENCV_HAL_IMPL_VSX_PACK(v_int16x8, short, v_int32x4, unsigned int, int,
+                         vec_sra, vec_packs, vec_add, pack)
+
+OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_uint64x2, unsigned long long, unsigned long long,
+                         vec_sr, vec_packs, vec_add, pack)
+OPENCV_HAL_IMPL_VSX_PACK(v_int32x4, int, v_int64x2, unsigned long long, long long,
+                         vec_sra, vec_packs, vec_add, pack)
+
+OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_int16x8, unsigned short, short,
+                         vec_sra, vec_packsu, vec_adds, pack_u)
+OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_int32x4, unsigned int, int,
+                         vec_sra, vec_packsu, vec_add, pack_u)
+OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_int64x2, unsigned long long, long long,
+                         vec_sra, vec_packsu, vec_add, pack_u)
+
+/* Recombine */
+template <typename _Tpvec>
+inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1)
+{
+    b0.val = vec_mergeh(a0.val, a1.val);
+    b1.val = vec_mergel(a0.val, a1.val);
+}
+
+template <typename _Tpvec>
+inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b)
+{ return _Tpvec(vec_mergesql(a.val, b.val)); }
+
+template <typename _Tpvec>
+inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b)
+{ return _Tpvec(vec_mergesqh(a.val, b.val)); }
+
+template <typename _Tpvec>
+inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d)
+{
+    c.val = vec_mergesqh(a.val, b.val);
+    d.val = vec_mergesql(a.val, b.val);
+}
+
+/* Extract */
+template<int s, typename _Tpvec>
+inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
+{
+    const int w = sizeof(typename _Tpvec::lane_type);
+    const int n = _Tpvec::nlanes;
+    const unsigned int sf = ((w * n) - (s * w));
+    if (s == 0)
+        return _Tpvec(a.val);
+    else if (sf > 15)
+        return _Tpvec();
+    // bitwise it just to make xlc happy
+    return _Tpvec(vec_sld(b.val, a.val, sf & 15));
+}
+
+#define OPENCV_HAL_IMPL_VSX_EXTRACT_2(_Tpvec)             \
+template<int s>                                           \
+inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \
+{                                                         \
+    switch(s) {                                           \
+    case 0: return _Tpvec(a.val);                         \
+    case 2: return _Tpvec(b.val);                         \
+    case 1: return _Tpvec(vec_sldw(b.val, a.val, 2));     \
+    default: return _Tpvec();                             \
+    }                                                     \
+}
+OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_uint64x2)
+OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_int64x2)
+
+
+////////// Arithmetic, bitwise and comparison operations /////////
+
+/* Element-wise binary and unary operations */
+/** Arithmetics **/
+#define OPENCV_HAL_IMPL_VSX_BIN_OP(bin_op, _Tpvec, intrin)       \
+inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(intrin(a.val, b.val)); }                         \
+inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b)   \
+{ a.val = intrin(a.val, b.val); return a; }
+
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint8x16, vec_adds)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint8x16, vec_subs)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int8x16,  vec_adds)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int8x16, vec_subs)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint16x8, vec_adds)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint16x8, vec_subs)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint16x8, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int16x8, vec_adds)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int16x8, vec_subs)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_int16x8, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint32x4, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint32x4, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint32x4, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int32x4, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int32x4, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_int32x4, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float32x4, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float32x4, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float32x4, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float32x4, vec_div)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float64x2, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float64x2, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float64x2, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float64x2, vec_div)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint64x2, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint64x2, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int64x2, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int64x2, vec_sub)
+
+inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, v_int32x4& c, v_int32x4& d)
+{
+    c.val = vec_mul(vec_unpackh(a.val), vec_unpackh(b.val));
+    d.val = vec_mul(vec_unpackl(a.val), vec_unpackl(b.val));
+}
+inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, v_uint32x4& c, v_uint32x4& d)
+{
+    c.val = vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val));
+    d.val = vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val));
+}
+inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, v_uint64x2& c, v_uint64x2& d)
+{
+    c.val = vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val));
+    d.val = vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val));
+}
+
+/** Non-saturating arithmetics **/
+#define OPENCV_HAL_IMPL_VSX_BIN_FUNC(func, intrin)    \
+template<typename _Tpvec>                             \
+inline _Tpvec func(const _Tpvec& a, const _Tpvec& b)  \
+{ return _Tpvec(intrin(a.val, b.val)); }
+
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_add_wrap, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_sub_wrap, vec_sub)
+
+/** Bitwise shifts **/
+#define OPENCV_HAL_IMPL_VSX_SHIFT_OP(_Tpuvec, splfunc)   \
+inline _Tpuvec operator << (const _Tpuvec& a, int imm)   \
+{ return _Tpuvec(vec_sl(a.val, splfunc(imm))); }         \
+inline _Tpuvec operator >> (const _Tpuvec& a, int imm)   \
+{ return _Tpuvec(vec_sr(a.val, splfunc(imm))); }         \
+template<int imm> inline _Tpuvec v_shl(const _Tpuvec& a) \
+{ return _Tpuvec(vec_sl(a.val, splfunc(imm))); }         \
+template<int imm> inline _Tpuvec v_shr(const _Tpuvec& a) \
+{ return _Tpuvec(vec_sr(a.val, splfunc(imm))); }
+
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint8x16, vec_uchar16_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int8x16, vec_uchar16_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint16x8, vec_ushort8_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int16x8, vec_ushort8_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint32x4, vec_uint4_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int32x4, vec_uint4_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint64x2, vec_udword2_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int64x2, vec_udword2_sp)
+
+/** Bitwise logic **/
+#define OPENCV_HAL_IMPL_VSX_LOGIC_OP(_Tpvec)    \
+OPENCV_HAL_IMPL_VSX_BIN_OP(&, _Tpvec, vec_and)  \
+OPENCV_HAL_IMPL_VSX_BIN_OP(|, _Tpvec, vec_or)   \
+OPENCV_HAL_IMPL_VSX_BIN_OP(^, _Tpvec, vec_xor)  \
+inline _Tpvec operator ~ (const _Tpvec& a)      \
+{ return _Tpvec(vec_not(a.val)); }
+
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint8x16)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int8x16)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint16x8)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int16x8)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint32x4)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int32x4)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint64x2)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int64x2)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float32x4)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float64x2)
+
+/** Bitwise select **/
+#define OPENCV_HAL_IMPL_VSX_SELECT(_Tpvec, cast)                             \
+inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(vec_sel(b.val, a.val, cast(mask.val))); }
+
+OPENCV_HAL_IMPL_VSX_SELECT(v_uint8x16, vec_bchar16_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_int8x16, vec_bchar16_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_uint16x8, vec_bshort8_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_int16x8, vec_bshort8_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_uint32x4, vec_bint4_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_int32x4, vec_bint4_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_float32x4, vec_bint4_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_float64x2, vec_bdword2_c)
+
+/** Comparison **/
+#define OPENCV_HAL_IMPL_VSX_INT_CMP_OP(_Tpvec)                 \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b)   \
+{ return _Tpvec(vec_cmpeq(a.val, b.val)); }                    \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b)   \
+{ return _Tpvec(vec_cmpne(a.val, b.val)); }                    \
+inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b)    \
+{ return _Tpvec(vec_cmplt(a.val, b.val)); }                    \
+inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b)    \
+{ return _Tpvec(vec_cmpgt(a.val, b.val)); }                    \
+inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b)   \
+{ return _Tpvec(vec_cmple(a.val, b.val)); }                    \
+inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b)   \
+{ return _Tpvec(vec_cmpge(a.val, b.val)); }
+
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint8x16)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int8x16)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint16x8)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int16x8)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint32x4)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int32x4)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float32x4)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float64x2)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint64x2)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int64x2)
+
+/** min/max **/
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_min, vec_min)
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_max, vec_max)
+
+////////// Reduce and mask /////////
+
+/** Reduce **/
+inline short v_reduce_sum(const v_int16x8& a)
+{
+    const vec_int4 zero = vec_int4_z;
+    return saturate_cast<short>(vec_extract(vec_sums(vec_sum4s(a.val, zero), zero), 3));
+}
+inline ushort v_reduce_sum(const v_uint16x8& a)
+{
+    const vec_int4 v4 = vec_int4_c(vec_unpackhu(vec_adds(a.val, vec_sld(a.val, a.val, 8))));
+    return saturate_cast<ushort>(vec_extract(vec_sums(v4, vec_int4_z), 3));
+}
+
+#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(_Tpvec, _Tpvec2, scalartype, suffix, func) \
+inline scalartype v_reduce_##suffix(const _Tpvec& a)                               \
+{                                                                                  \
+    const _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8));                      \
+    return vec_extract(func(rs, vec_sld(rs, rs, 4)), 0);                           \
+}
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, sum, vec_add)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, min, vec_min)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, sum, vec_add)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, min, vec_min)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, sum, vec_add)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, min, vec_min)
+
+#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(_Tpvec, _Tpvec2, scalartype, suffix, func) \
+inline scalartype v_reduce_##suffix(const _Tpvec& a)                               \
+{                                                                                  \
+    _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8));                            \
+    rs = func(rs, vec_sld(rs, rs, 4));                                             \
+    return vec_extract(func(rs, vec_sld(rs, rs, 2)), 0);                           \
+}
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, min, vec_min)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, min, vec_min)
+
+inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
+                                 const v_float32x4& c, const v_float32x4& d)
+{
+    vec_float4 ac = vec_add(vec_mergel(a.val, c.val), vec_mergeh(a.val, c.val));
+    ac = vec_add(ac, vec_sld(ac, ac, 8));
+
+    vec_float4 bd = vec_add(vec_mergel(b.val, d.val), vec_mergeh(b.val, d.val));
+    bd = vec_add(bd, vec_sld(bd, bd, 8));
+    return v_float32x4(vec_mergeh(ac, bd));
+}
+
+/** Popcount **/
+#define OPENCV_HAL_IMPL_VSX_POPCOUNT_8(_Tpvec)                           \
+inline v_uint32x4 v_popcount(const _Tpvec& a)                            \
+{                                                                        \
+    vec_uchar16 v16 = vec_popcntu(a.val);                                \
+    vec_ushort8 v8  = vec_add(vec_unpacklu(v16), vec_unpackhu(v16));     \
+    return v_uint32x4(vec_add(vec_unpacklu(v8), vec_unpackhu(v8)));      \
+}
+OPENCV_HAL_IMPL_VSX_POPCOUNT_8(v_int8x16)
+OPENCV_HAL_IMPL_VSX_POPCOUNT_8(v_uint8x16)
+
+#define OPENCV_HAL_IMPL_VSX_POPCOUNT_16(_Tpvec)                          \
+inline v_uint32x4 v_popcount(const _Tpvec& a)                            \
+{                                                                        \
+    vec_ushort8 v8  = vec_popcntu(a.val);                                \
+    return v_uint32x4(vec_add(vec_unpacklu(v8), vec_unpackhu(v8)));      \
+}
+OPENCV_HAL_IMPL_VSX_POPCOUNT_16(v_int16x8)
+OPENCV_HAL_IMPL_VSX_POPCOUNT_16(v_uint16x8)
+
+#define OPENCV_HAL_IMPL_VSX_POPCOUNT_32(_Tpvec)                          \
+inline v_uint32x4 v_popcount(const _Tpvec& a)                            \
+{ return v_uint32x4(vec_popcntu(a.val)); }
+
+OPENCV_HAL_IMPL_VSX_POPCOUNT_32(v_int32x4)
+OPENCV_HAL_IMPL_VSX_POPCOUNT_32(v_uint32x4)
+
+/** Mask **/
+inline int v_signmask(const v_uint8x16& a)
+{
+    vec_uchar16 sv  = vec_sr(a.val, vec_uchar16_sp(7));
+    static const vec_uchar16 slm = {0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7};
+    sv = vec_sl(sv, slm);
+    vec_uint4 sv4 = vec_sum4s(sv, vec_uint4_z);
+    static const vec_uint4 slm4 = {0, 0, 8, 8};
+    sv4 = vec_sl(sv4, slm4);
+    return vec_extract(vec_sums((vec_int4) sv4, vec_int4_z), 3);
+}
+inline int v_signmask(const v_int8x16& a)
+{ return v_signmask(v_reinterpret_as_u8(a)); }
+
+inline int v_signmask(const v_int16x8& a)
+{
+    static const vec_ushort8 slm = {0, 1, 2, 3, 4, 5, 6, 7};
+    vec_short8 sv = vec_sr(a.val, vec_ushort8_sp(15));
+    sv = vec_sl(sv, slm);
+    vec_int4 svi = vec_int4_z;
+    svi = vec_sums(vec_sum4s(sv, svi), svi);
+    return vec_extract(svi, 3);
+}
+inline int v_signmask(const v_uint16x8& a)
+{ return v_signmask(v_reinterpret_as_s16(a)); }
+
+inline int v_signmask(const v_int32x4& a)
+{
+    static const vec_uint4 slm = {0, 1, 2, 3};
+    vec_int4 sv = vec_sr(a.val, vec_uint4_sp(31));
+    sv = vec_sl(sv, slm);
+    sv = vec_sums(sv, vec_int4_z);
+    return vec_extract(sv, 3);
+}
+inline int v_signmask(const v_uint32x4& a)
+{ return v_signmask(v_reinterpret_as_s32(a)); }
+inline int v_signmask(const v_float32x4& a)
+{ return v_signmask(v_reinterpret_as_s32(a)); }
+
+inline int v_signmask(const v_int64x2& a)
+{
+    const vec_dword2 sv = vec_sr(a.val, vec_udword2_sp(63));
+    return (int)vec_extract(sv, 0) | (int)vec_extract(sv, 1) << 1;
+}
+inline int v_signmask(const v_uint64x2& a)
+{ return v_signmask(v_reinterpret_as_s64(a)); }
+inline int v_signmask(const v_float64x2& a)
+{ return v_signmask(v_reinterpret_as_s64(a)); }
+
+
+template<typename _Tpvec>
+inline bool v_check_all(const _Tpvec& a)
+{ return vec_all_lt(a.val, _Tpvec().val);}
+inline bool v_check_all(const v_uint8x16 &a)
+{ return v_check_all(v_reinterpret_as_s8(a)); }
+inline bool v_check_all(const v_uint16x8 &a)
+{ return v_check_all(v_reinterpret_as_s16(a)); }
+inline bool v_check_all(const v_uint32x4 &a)
+{ return v_check_all(v_reinterpret_as_s32(a)); }
+
+template<typename _Tpvec>
+inline bool v_check_any(const _Tpvec& a)
+{ return vec_any_lt(a.val, _Tpvec().val);}
+inline bool v_check_any(const v_uint8x16 &a)
+{ return v_check_any(v_reinterpret_as_s8(a)); }
+inline bool v_check_any(const v_uint16x8 &a)
+{ return v_check_any(v_reinterpret_as_s16(a)); }
+inline bool v_check_any(const v_uint32x4 &a)
+{ return v_check_any(v_reinterpret_as_s32(a)); }
+
+////////// Other math /////////
+
+/** Some frequent operations **/
+inline v_float32x4 v_sqrt(const v_float32x4& x)
+{ return v_float32x4(vec_sqrt(x.val)); }
+inline v_float64x2 v_sqrt(const v_float64x2& x)
+{ return v_float64x2(vec_sqrt(x.val)); }
+
+inline v_float32x4 v_invsqrt(const v_float32x4& x)
+{ return v_float32x4(vec_rsqrt(x.val)); }
+inline v_float64x2 v_invsqrt(const v_float64x2& x)
+{ return v_float64x2(vec_rsqrt(x.val)); }
+
+#define OPENCV_HAL_IMPL_VSX_MULADD(_Tpvec)                                  \
+inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b)                 \
+{ return _Tpvec(vec_sqrt(vec_madd(a.val, a.val, vec_mul(b.val, b.val)))); } \
+inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b)             \
+{ return _Tpvec(vec_madd(a.val, a.val, vec_mul(b.val, b.val))); }           \
+inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c)   \
+{ return _Tpvec(vec_madd(a.val, b.val, c.val)); }
+
+OPENCV_HAL_IMPL_VSX_MULADD(v_float32x4)
+OPENCV_HAL_IMPL_VSX_MULADD(v_float64x2)
+
+// TODO: exp, log, sin, cos
+
+/** Absolute values **/
+inline v_uint8x16 v_abs(const v_int8x16& x)
+{ return v_uint8x16(vec_uchar16_c(vec_abs(x.val))); }
+
+inline v_uint16x8 v_abs(const v_int16x8& x)
+{ return v_uint16x8(vec_ushort8_c(vec_abs(x.val))); }
+
+inline v_uint32x4 v_abs(const v_int32x4& x)
+{ return v_uint32x4(vec_uint4_c(vec_abs(x.val))); }
+
+inline v_float32x4 v_abs(const v_float32x4& x)
+{ return v_float32x4(vec_abs(x.val)); }
+
+inline v_float64x2 v_abs(const v_float64x2& x)
+{ return v_float64x2(vec_abs(x.val)); }
+
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_absdiff, vec_absd)
+
+#define OPENCV_HAL_IMPL_VSX_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin)  \
+inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b)                       \
+{ return _Tpvec2(cast(intrin(a.val, b.val))); }
+
+OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int8x16, v_uint8x16, vec_uchar16_c, v_absdiff, vec_absd)
+OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int16x8, v_uint16x8, vec_ushort8_c, v_absdiff, vec_absd)
+OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int32x4, v_uint32x4, vec_uint4_c, v_absdiff, vec_absd)
+OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int64x2, v_uint64x2, vec_udword2_c, v_absdiff, vec_absd)
+
+////////// Conversions /////////
+
+/** Rounding **/
+inline v_int32x4 v_round(const v_float32x4& a)
+{ return v_int32x4(vec_cts(vec_round(a.val), 0)); }
+
+inline v_int32x4 v_round(const v_float64x2& a)
+{
+    static const vec_uchar16 perm = {16, 17, 18, 19, 24, 25, 26, 27, 0, 0, 0, 0, 0, 0, 0, 0};
+    return v_int32x4(vec_perm(vec_int4_z, vec_ctsw(vec_round(a.val)), perm));
+}
+
+inline v_int32x4 v_floor(const v_float32x4& a)
+{ return v_int32x4(vec_cts(vec_floor(a.val), 0)); }
+
+inline v_int32x4 v_floor(const v_float64x2& a)
+{
+    static const vec_uchar16 perm = {16, 17, 18, 19, 24, 25, 26, 27, 0, 0, 0, 0, 0, 0, 0, 0};
+    return v_int32x4(vec_perm(vec_int4_z, vec_ctsw(vec_floor(a.val)), perm));
+}
+
+inline v_int32x4 v_ceil(const v_float32x4& a)
+{ return v_int32x4(vec_cts(vec_ceil(a.val), 0)); }
+
+inline v_int32x4 v_ceil(const v_float64x2& a)
+{
+    static const vec_uchar16 perm = {16, 17, 18, 19, 24, 25, 26, 27, 0, 0, 0, 0, 0, 0, 0, 0};
+    return v_int32x4(vec_perm(vec_int4_z, vec_ctsw(vec_ceil(a.val)), perm));
+}
+
+inline v_int32x4 v_trunc(const v_float32x4& a)
+{ return v_int32x4(vec_cts(a.val, 0)); }
+
+inline v_int32x4 v_trunc(const v_float64x2& a)
+{
+    static const vec_uchar16 perm = {16, 17, 18, 19, 24, 25, 26, 27, 0, 0, 0, 0, 0, 0, 0, 0};
+    return v_int32x4(vec_perm(vec_int4_z, vec_ctsw(a.val), perm));
+}
+
+/** To float **/
+inline v_float32x4 v_cvt_f32(const v_int32x4& a)
+{ return v_float32x4(vec_ctf(a.val, 0)); }
+
+inline v_float32x4 v_cvt_f32(const v_float64x2& a)
+{
+    static const vec_uchar16 perm = {16, 17, 18, 19, 24, 25, 26, 27, 0, 0, 0, 0, 0, 0, 0, 0};
+    return v_float32x4(vec_perm(vec_float4_z, vec_cvf(a.val), perm));
+}
+inline v_float64x2 v_cvt_f64(const v_int32x4& a)
+{
+    return v_float64x2(vec_ctd(vec_mergeh(a.val, a.val), 0));
+}
+inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
+{
+    return v_float64x2(vec_ctd(vec_mergel(a.val, a.val), 0));
+}
+inline v_float64x2 v_cvt_f64(const v_float32x4& a)
+{
+    return v_float64x2(vec_cvf(vec_mergeh(a.val, a.val)));
+}
+inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
+{
+    return v_float64x2(vec_cvf(vec_mergel(a.val, a.val)));
+}
+
+/** Reinterpret **/
+/** its up there with load and store operations **/
+
+////////// Matrix operations /////////
+
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
+{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); }
+
+inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
+                            const v_float32x4& m1, const v_float32x4& m2,
+                            const v_float32x4& m3)
+{
+    const vec_float4 v0 = vec_splat(v.val, 0);
+    const vec_float4 v1 = vec_splat(v.val, 1);
+    const vec_float4 v2 = vec_splat(v.val, 2);
+    const vec_float4 v3 = vec_splat(v.val, 3);
+    return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, vec_mul(v3, m3.val)))));
+}
+
+#define OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(_Tpvec, _Tpvec2)                        \
+inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1,                   \
+                           const _Tpvec& a2, const _Tpvec& a3,                   \
+                           _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3)       \
+{                                                                                \
+    _Tpvec2 a02 = vec_mergeh(a0.val, a2.val);                                    \
+    _Tpvec2 a13 = vec_mergeh(a1.val, a3.val);                                    \
+    b0.val = vec_mergeh(a02, a13);                                               \
+    b1.val = vec_mergel(a02, a13);                                               \
+    a02 = vec_mergel(a0.val, a2.val);                                            \
+    a13 = vec_mergel(a1.val, a3.val);                                            \
+    b2.val  = vec_mergeh(a02, a13);                                              \
+    b3.val  = vec_mergel(a02, a13);                                              \
+}
+OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_uint32x4, vec_uint4)
+OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_int32x4, vec_int4)
+OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_float32x4, vec_float4)
+
+//! @name Check SIMD support
+//! @{
+//! @brief Check CPU capability of SIMD operation
+static inline bool hasSIMD128()
+{
+    return (CV_CPU_HAS_SUPPORT_VSX) ? true : false;
+}
+
+//! @}
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
+//! @endcond
+
+}
+
+#endif // OPENCV_HAL_VSX_HPP
index 2e927c4..e973593 100644 (file)
@@ -582,7 +582,7 @@ protected:
 An example demonstrating the serial out capabilities of cv::Mat
 */
 
- /** @brief n-dimensional dense array class
+ /** @brief n-dimensional dense array class \anchor CVMat_Details
 
 The class Mat represents an n-dimensional dense numerical single-channel or multi-channel array. It
 can be used to store real or complex-valued vectors and matrices, grayscale or color images, voxel
index 12814e1..c36252a 100644 (file)
@@ -333,8 +333,12 @@ public:
     void* ptr() const;
     static Queue& getDefault();
 
+    /// @brief Returns OpenCL command queue with enable profiling mode support
+    const Queue& getProfilingQueue() const;
+
+    struct Impl; friend struct Impl;
+    inline Impl* getImpl() const { return p; }
 protected:
-    struct Impl;
     Impl* p;
 };
 
@@ -569,6 +573,12 @@ public:
              size_t localsize[], bool sync, const Queue& q=Queue());
     bool runTask(bool sync, const Queue& q=Queue());
 
+    /** @brief Similar to synchronized run() call with returning of kernel execution time
+     * Separate OpenCL command queue may be used (with CL_QUEUE_PROFILING_ENABLE)
+     * @return Execution time in nanoseconds or negative number on error
+     */
+    int64 runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q=Queue());
+
     size_t workGroupSize() const;
     size_t preferedWorkGroupSizeMultiple() const;
     bool compileWorkGroupSize(size_t wsz[]) const;
@@ -603,8 +613,10 @@ public:
     String getPrefix() const;
     static String getPrefix(const String& buildflags);
 
-protected:
+
     struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
+protected:
     Impl* p;
 };
 
@@ -625,8 +637,9 @@ public:
     const String& source() const;
     hash_t hash() const; // deprecated
 
-protected:
     struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
+protected:
     Impl* p;
 };
 
@@ -655,6 +668,7 @@ CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
 CV_EXPORTS const char* typeToStr(int t);
 CV_EXPORTS const char* memopTypeToStr(int t);
 CV_EXPORTS const char* vecopTypeToStr(int t);
+CV_EXPORTS const char* getOpenCLErrorString(int errorCode);
 CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL);
 CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
 
@@ -721,6 +735,21 @@ protected:
     Impl* p;
 };
 
+class CV_EXPORTS Timer
+{
+public:
+    Timer(const Queue& q);
+    ~Timer();
+    void start();
+    void stop();
+    float milliSeconds();
+    float microSeconds();
+    float seconds();
+
+protected:
+    struct Impl;
+    Impl* p;
+};
 
 CV_EXPORTS MatAllocator* getOpenCLAllocator();
 
diff --git a/modules/core/include/opencv2/core/utils/configuration.private.hpp b/modules/core/include/opencv2/core/utils/configuration.private.hpp
new file mode 100644 (file)
index 0000000..fa1b045
--- /dev/null
@@ -0,0 +1,16 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CONFIGURATION_PRIVATE_HPP
+#define OPENCV_CONFIGURATION_PRIVATE_HPP
+
+namespace cv { namespace utils {
+
+CV_EXPORTS bool getConfigurationParameterBool(const char* name, bool defaultValue);
+CV_EXPORTS size_t getConfigurationParameterSizeT(const char* name, size_t defaultValue);
+CV_EXPORTS cv::String getConfigurationParameterString(const char* name, const char* defaultValue);
+
+}} // namespace
+
+#endif // OPENCV_CONFIGURATION_PRIVATE_HPP
index d7e73de..c7b31ea 100644 (file)
@@ -16,6 +16,7 @@
 //
 //! @{
 
+namespace cv {
 namespace utils {
 namespace logging {
 
@@ -77,7 +78,7 @@ enum LogLevel {
 #endif
 
 
-}} // namespace
+}}} // namespace
 
 //! @}
 
diff --git a/modules/core/include/opencv2/core/vsx_utils.hpp b/modules/core/include/opencv2/core/vsx_utils.hpp
new file mode 100644 (file)
index 0000000..3ce190b
--- /dev/null
@@ -0,0 +1,945 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_VSX_UTILS_HPP
+#define OPENCV_HAL_VSX_UTILS_HPP
+
+#include "opencv2/core/cvdef.h"
+
+//! @addtogroup core_utils_vsx
+//! @{
+#if CV_VSX
+
+#define FORCE_INLINE(tp) extern inline tp __attribute__((always_inline))
+
+#define VSX_REDIRECT_1RG(rt, rg, fnm, fn2)   \
+FORCE_INLINE(rt) fnm(const rg& a) { return fn2(a); }
+
+#define VSX_REDIRECT_2RG(rt, rg, fnm, fn2)   \
+FORCE_INLINE(rt) fnm(const rg& a, const rg& b) { return fn2(a, b); }
+
+#define VSX_IMPL_PERM(rt, fnm, ...)            \
+FORCE_INLINE(rt) fnm(const rt& a, const rt& b) \
+    { static const vec_uchar16 perm = {__VA_ARGS__}; return vec_perm(a, b, perm); }
+
+#define __VSX_S16__(c, v) (c){v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}
+#define __VSX_S8__(c, v)  (c){v, v, v, v, v, v, v, v}
+#define __VSX_S4__(c, v)  (c){v, v, v, v}
+#define __VSX_S2__(c, v)  (c){v, v}
+
+typedef __vector unsigned char vec_uchar16;
+#define vec_uchar16_set(...) (vec_uchar16){__VA_ARGS__}
+#define vec_uchar16_sp(c)    (__VSX_S16__(vec_uchar16, c))
+#define vec_uchar16_c(v)     ((vec_uchar16)(v))
+#define vec_uchar16_mx       vec_uchar16_sp(0xFF)
+#define vec_uchar16_mn       vec_uchar16_sp(0)
+#define vec_uchar16_z        vec_uchar16_mn
+
+typedef __vector signed char vec_char16;
+#define vec_char16_set(...) (vec_char16){__VA_ARGS__}
+#define vec_char16_sp(c)    (__VSX_S16__(vec_char16, c))
+#define vec_char16_c(v)     ((vec_char16)(v))
+#define vec_char16_mx       vec_char16_sp(0x7F)
+#define vec_char16_mn       vec_char16_sp(-0x7F-1)
+#define vec_char16_z        vec_char16_sp(0)
+
+typedef __vector unsigned short vec_ushort8;
+#define vec_ushort8_set(...) (vec_ushort8){__VA_ARGS__}
+#define vec_ushort8_sp(c)    (__VSX_S8__(vec_ushort8, c))
+#define vec_ushort8_c(v)     ((vec_ushort8)(v))
+#define vec_ushort8_mx       vec_ushort8_sp(0xFFFF)
+#define vec_ushort8_mn       vec_ushort8_sp(0)
+#define vec_ushort8_z        vec_ushort8_mn
+
+typedef __vector signed short vec_short8;
+#define vec_short8_set(...) (vec_short8){__VA_ARGS__}
+#define vec_short8_sp(c)    (__VSX_S8__(vec_short8, c))
+#define vec_short8_c(v)     ((vec_short8)(v))
+#define vec_short8_mx       vec_short8_sp(0x7FFF)
+#define vec_short8_mn       vec_short8_sp(-0x7FFF-1)
+#define vec_short8_z        vec_short8_sp(0)
+
+typedef __vector unsigned int vec_uint4;
+#define vec_uint4_set(...) (vec_uint4){__VA_ARGS__}
+#define vec_uint4_sp(c)    (__VSX_S4__(vec_uint4, c))
+#define vec_uint4_c(v)     ((vec_uint4)(v))
+#define vec_uint4_mx       vec_uint4_sp(0xFFFFFFFFU)
+#define vec_uint4_mn       vec_uint4_sp(0)
+#define vec_uint4_z        vec_uint4_mn
+
+typedef __vector signed int vec_int4;
+#define vec_int4_set(...)  (vec_int4){__VA_ARGS__}
+#define vec_int4_sp(c)     (__VSX_S4__(vec_int4, c))
+#define vec_int4_c(v)      ((vec_int4)(v))
+#define vec_int4_mx        vec_int4_sp(0x7FFFFFFF)
+#define vec_int4_mn        vec_int4_sp(-0x7FFFFFFF-1)
+#define vec_int4_z         vec_int4_sp(0)
+
+typedef __vector float vec_float4;
+#define vec_float4_set(...)  (vec_float4){__VA_ARGS__}
+#define vec_float4_sp(c)     (__VSX_S4__(vec_float4, c))
+#define vec_float4_c(v)      ((vec_float4)(v))
+#define vec_float4_mx        vec_float4_sp(3.40282347E+38F)
+#define vec_float4_mn        vec_float4_sp(1.17549435E-38F)
+#define vec_float4_z         vec_float4_sp(0)
+
+typedef __vector unsigned long long vec_udword2;
+#define vec_udword2_set(...) (vec_udword2){__VA_ARGS__}
+#define vec_udword2_sp(c)    (__VSX_S2__(vec_udword2, c))
+#define vec_udword2_c(v)     ((vec_udword2)(v))
+#define vec_udword2_mx       vec_udword2_sp(18446744073709551615ULL)
+#define vec_udword2_mn       vec_udword2_sp(0)
+#define vec_udword2_z        vec_udword2_mn
+
+typedef __vector signed long long vec_dword2;
+#define vec_dword2_set(...) (vec_dword2){__VA_ARGS__}
+#define vec_dword2_sp(c)    (__VSX_S2__(vec_dword2, c))
+#define vec_dword2_c(v)     ((vec_dword2)(v))
+#define vec_dword2_mx       vec_dword2_sp(9223372036854775807LL)
+#define vec_dword2_mn       vec_dword2_sp(-9223372036854775807LL-1)
+#define vec_dword2_z        vec_dword2_sp(0)
+
+typedef  __vector double vec_double2;
+#define vec_double2_set(...) (vec_double2){__VA_ARGS__}
+#define vec_double2_c(v)     ((vec_double2)(v))
+#define vec_double2_sp(c)    (__VSX_S2__(vec_double2, c))
+#define vec_double2_mx       vec_double2_sp(1.7976931348623157E+308)
+#define vec_double2_mn       vec_double2_sp(2.2250738585072014E-308)
+#define vec_double2_z        vec_double2_sp(0)
+
+#define vec_bchar16              __vector __bool char
+#define vec_bchar16_set(...) (vec_bchar16){__VA_ARGS__}
+#define vec_bchar16_c(v)     ((vec_bchar16)(v))
+#define vec_bchar16_f        (__VSX_S16__(vec_bchar16, 0))
+#define vec_bchar16_t        (__VSX_S16__(vec_bchar16, 1))
+
+#define vec_bshort8              __vector __bool short
+#define vec_bshort8_set(...) (vec_bshort8){__VA_ARGS__}
+#define vec_bshort8_c(v)     ((vec_bshort8)(v))
+#define vec_bshort8_f        (__VSX_S8__(vec_bshort8, 0))
+#define vec_bshort8_t        (__VSX_S8__(vec_bshort8, 1))
+
+#define vec_bint4             __vector __bool int
+#define vec_bint4_set(...)   (vec_bint4){__VA_ARGS__}
+#define vec_bint4_c(v)       ((vec_bint4)(v))
+#define vec_bint4_f          (__VSX_S4__(vec_bint4, 0))
+#define vec_bint4_t          (__VSX_S4__(vec_bint4, 1))
+
+#define vec_bdword2               __vector __bool long long
+#define vec_bdword2_set(...)  (vec_bdword2){__VA_ARGS__}
+#define vec_bdword2_c(v)      ((vec_bdword2)(v))
+#define vec_bdword2_f         (__VSX_S2__(vec_bdword2, 0))
+#define vec_bdword2_t         (__VSX_S2__(vec_bdword2, 1))
+
+/*
+ * GCC VSX compatibility
+**/
+#if defined(__GNUG__) && !defined(__IBMCPP__) && !defined(__clang__)
+
+// inline asm helper
+#define VSX_IMPL_1RG(rt, rto, rg, rgo, opc, fnm) \
+FORCE_INLINE(rt) fnm(const rg& a)                \
+    { rt rs; __asm__ __volatile__(#opc" %x0,%x1" : "="#rto (rs) : #rgo (a)); return rs; }
+
+#define VSX_IMPL_1VRG(rt, rg, opc, fnm) \
+FORCE_INLINE(rt) fnm(const rg& a)       \
+    { rt rs; __asm__ __volatile__(#opc" %0,%1" : "=v" (rs) : "v" (a)); return rs; }
+
+#define VSX_IMPL_2VRG_F(rt, rg, fopc, fnm)     \
+FORCE_INLINE(rt) fnm(const rg& a, const rg& b) \
+    { rt rs; __asm__ __volatile__(fopc : "=v" (rs) : "v" (a), "v" (b)); return rs; }
+
+#define VSX_IMPL_2VRG(rt, rg, opc, fnm) VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%1,%2", fnm)
+
+#if __GNUG__ < 7
+/* up to GCC 6 vec_mul only supports precisions and llong */
+#   ifdef vec_mul
+#       undef vec_mul
+#   endif
+/*
+ * there's no a direct instruction for supporting 16-bit multiplication in ISA 2.07,
+ * XLC Implement it by using instruction "multiply even", "multiply oden" and "permute"
+ * todo: Do I need to support 8-bit ?
+**/
+#   define VSX_IMPL_MULH(Tvec, Tcast)                                               \
+    FORCE_INLINE(Tvec) vec_mul(const Tvec& a, const Tvec& b)                        \
+    {                                                                               \
+        static const vec_uchar16 even_perm = {0, 1, 16, 17, 4, 5, 20, 21,           \
+                                              8, 9, 24, 25, 12, 13, 28, 29};        \
+        return vec_perm(Tcast(vec_mule(a, b)), Tcast(vec_mulo(a, b)), even_perm);   \
+    }
+    VSX_IMPL_MULH(vec_short8, vec_short8_c)
+    VSX_IMPL_MULH(vec_ushort8, vec_ushort8_c)
+    /* vmuluwm can be used for unsigned or signed integers, that's what they said */
+    VSX_IMPL_2VRG(vec_int4, vec_int4, vmuluwm, vec_mul)
+    VSX_IMPL_2VRG(vec_uint4, vec_uint4, vmuluwm, vec_mul)
+    /* redirect to GCC builtin vec_mul, since it already supports precisions and llong */
+    VSX_REDIRECT_2RG(vec_float4, vec_float4, vec_mul, __builtin_vec_mul)
+    VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mul, __builtin_vec_mul)
+    VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mul, __builtin_vec_mul)
+    VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mul, __builtin_vec_mul)
+#endif // __GNUG__ < 7
+
+#if __GNUG__ < 6
+/*
+ * Instruction "compare greater than or equal" in ISA 2.07 only supports single
+ * and double precision.
+ * In XLC and new versions of GCC implement integers by using instruction "greater than" and NOR.
+**/
+#   ifdef vec_cmpge
+#       undef vec_cmpge
+#   endif
+#   ifdef vec_cmple
+#       undef vec_cmple
+#   endif
+#   define vec_cmple(a, b) vec_cmpge(b, a)
+#   define VSX_IMPL_CMPGE(rt, rg, opc, fnm) \
+    VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%2,%1\n\t xxlnor %x0,%x0,%x0", fnm)
+
+    VSX_IMPL_CMPGE(vec_bchar16, vec_char16, vcmpgtsb, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bchar16, vec_uchar16, vcmpgtub, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bshort8, vec_short8, vcmpgtsh, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bshort8, vec_ushort8, vcmpgtuh, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bint4, vec_int4, vcmpgtsw, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bint4, vec_uint4, vcmpgtuw, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bdword2, vec_dword2, vcmpgtsd, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bdword2, vec_udword2, vcmpgtud, vec_cmpge)
+    /* redirect to GCC builtin cmpge, since it already supports precisions */
+    VSX_REDIRECT_2RG(vec_bint4, vec_float4, vec_cmpge, __builtin_vec_cmpge)
+    VSX_REDIRECT_2RG(vec_bdword2, vec_double2, vec_cmpge, __builtin_vec_cmpge)
+
+// up to gcc5 vec_nor doesn't support bool long long
+#   undef vec_nor
+template<typename T>
+VSX_REDIRECT_2RG(T, T, vec_nor, __builtin_vec_nor)
+
+FORCE_INLINE(vec_bdword2) vec_nor(const vec_bdword2& a, const vec_bdword2& b)
+{ return vec_bdword2_c(__builtin_vec_nor(vec_dword2_c(a), vec_dword2_c(b))); }
+#endif // __GNUG__ < 6
+
+// vector population count
+#ifndef vec_popcnt
+    VSX_IMPL_1VRG(vec_uchar16, vec_uchar16, vpopcntb, vec_popcnt)
+    VSX_IMPL_1VRG(vec_uchar16, vec_char16, vpopcntb, vec_popcnt)
+    VSX_IMPL_1VRG(vec_ushort8, vec_ushort8, vpopcnth, vec_popcnt)
+    VSX_IMPL_1VRG(vec_ushort8, vec_short8, vpopcnth, vec_popcnt)
+    VSX_IMPL_1VRG(vec_uint4, vec_uint4, vpopcntw, vec_popcnt)
+    VSX_IMPL_1VRG(vec_uint4, vec_int4, vpopcntw, vec_popcnt)
+    VSX_IMPL_1VRG(vec_udword2, vec_udword2, vpopcntd, vec_popcnt)
+    VSX_IMPL_1VRG(vec_udword2, vec_dword2, vpopcntd, vec_popcnt)
+#endif // vec_popcnt
+
+#if __GNUG__ < 5
+// vec_xxpermdi in gcc4 missing little-endian supports just like clang
+#   define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ ((c & 1) << 1 | c >> 1)))
+// vec_packs doesn't support double words in gcc4
+# undef vec_packs
+VSX_REDIRECT_2RG(vec_char16, vec_short8, vec_packs, __builtin_vec_packs)
+VSX_REDIRECT_2RG(vec_uchar16, vec_ushort8, vec_packs, __builtin_vec_packs)
+VSX_REDIRECT_2RG(vec_short8, vec_int4, vec_packs, __builtin_vec_packs)
+VSX_REDIRECT_2RG(vec_ushort8, vec_uint4, vec_packs, __builtin_vec_packs)
+VSX_IMPL_2VRG_F(vec_int4, vec_dword2, "vpksdss %0,%2,%1", vec_packs)
+VSX_IMPL_2VRG_F(vec_uint4, vec_udword2, "vpkudus %0,%2,%1", vec_packs)
+#else
+#   define vec_permi vec_xxpermdi
+#endif
+
+// converts between single and double-precision
+#ifndef vec_cvf
+    VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvf, __builtin_vsx_xvcvdpsp)
+    FORCE_INLINE(vec_double2) vec_cvf(const vec_float4& a)
+    { return __builtin_vsx_xvcvspdp(vec_sld(a, a, 4)); }
+#endif
+
+// converts 32 and 64 bit integers to double-precision
+#ifndef vec_ctd
+#   define vec_ctd(a, b) __vec_ctd(a)
+    VSX_IMPL_1RG(vec_double2, wd, vec_int4, wa, xvcvsxwdp, __vec_ctd)
+    VSX_IMPL_1RG(vec_double2, wd, vec_uint4, wa, xvcvuxwdp, __vec_ctd)
+    VSX_IMPL_1RG(vec_double2, wd, vec_dword2, wi, xvcvsxddp, __vec_ctd)
+    VSX_IMPL_1RG(vec_double2, wd, vec_udword2, wi, xvcvuxddp, __vec_ctd)
+#endif
+
+// shift left double by word immediate
+#ifndef vec_sldw
+#   define vec_sldw __builtin_vsx_xxsldwi
+#endif
+
+// just in case if GCC doesn't define it
+#ifndef vec_xl
+#   define vec_xl vec_vsx_ld
+#   define vec_xst vec_vsx_st
+#endif
+
+#endif // GCC VSX compatibility
+
+/*
+ * CLANG VSX compatibility
+**/
+#if defined(__clang__) && !defined(__IBMCPP__)
+
+/*
+ * CLANG doesn't support %x<n> in the inline asm template which fixes register number
+ * when using any of the register constraints wa, wd, wf
+ *
+ * For more explanation checkout PowerPC and IBM RS6000 in https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html
+ * Also there's already an open bug https://bugs.llvm.org/show_bug.cgi?id=31837
+ *
+ * So we're not able to use inline asm and only use built-in functions that CLANG supports
+*/
+
+#if __clang_major__ < 5
+// implement vec_permi in a dirty way
+#   define VSX_IMPL_CLANG_4_PERMI(Tvec)                                                 \
+    FORCE_INLINE(Tvec) vec_permi(const Tvec& a, const Tvec& b, unsigned const char c)   \
+    {                                                                                   \
+        switch (c)                                                                      \
+        {                                                                               \
+        case 0:                                                                         \
+            return vec_mergeh(a, b);                                                    \
+        case 1:                                                                         \
+            return vec_mergel(vec_mergeh(a, a), b);                                     \
+        case 2:                                                                         \
+            return vec_mergeh(vec_mergel(a, a), b);                                     \
+        default:                                                                        \
+            return vec_mergel(a, b);                                                    \
+        }                                                                               \
+    }
+    VSX_IMPL_CLANG_4_PERMI(vec_udword2)
+    VSX_IMPL_CLANG_4_PERMI(vec_dword2)
+    VSX_IMPL_CLANG_4_PERMI(vec_double2)
+
+// vec_xxsldwi is missing in clang 4
+#   define vec_xxsldwi(a, b, c) vec_sld(a, b, (c) * 4)
+#else
+// vec_xxpermdi is missing little-endian supports in clang 4 just like gcc4
+#   define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ ((c & 1) << 1 | c >> 1)))
+#endif // __clang_major__ < 5
+
+// shift left double by word immediate
+#ifndef vec_sldw
+#   define vec_sldw vec_xxsldwi
+#endif
+
+/* converts between single and double precision */
+#ifndef vec_cvf
+    VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvf, __builtin_vsx_xvcvdpsp)
+    FORCE_INLINE(vec_double2) vec_cvf(const vec_float4& a)
+    { return __builtin_vsx_xvcvspdp(vec_sld(a, a, 4)); }
+#endif
+
+/* converts 32 and 64 bit integers to double-precision */
+#ifndef vec_ctd
+#   define vec_ctd(a, b) __vec_ctd(a)
+    VSX_REDIRECT_1RG(vec_double2, vec_int4, __vec_ctd, __builtin_vsx_xvcvsxwdp)
+    VSX_REDIRECT_1RG(vec_double2, vec_uint4, __vec_ctd, __builtin_vsx_xvcvuxwdp)
+    // implement vec_ctd for double word in a dirty way since we are missing builtin xvcvsxddp, xvcvuxddp
+    // please try to avoid using it for double words
+    FORCE_INLINE(vec_double2) __vec_ctd(const vec_dword2& a)
+    { return vec_double2_set((double)vec_extract(a, 0), (double)vec_extract(a, 1)); }
+    FORCE_INLINE(vec_double2) __vec_ctd(const vec_udword2& a)
+    { return vec_double2_set((double)vec_extract(a, 0), (double)vec_extract(a, 1)); }
+#endif
+
+// Implement vec_rsqrt since clang only supports vec_rsqrte
+#ifndef vec_rsqrt
+    FORCE_INLINE(vec_float4) vec_rsqrt(const vec_float4& a)
+    { return vec_div(vec_float4_sp(1), vec_sqrt(a)); }
+
+    FORCE_INLINE(vec_double2) vec_rsqrt(const vec_double2& a)
+    { return vec_div(vec_double2_sp(1), vec_sqrt(a)); }
+#endif
+
+
+/*
+ * __builtin_altivec_vctsxs in clang 5 and 6 causes ambiguous which used by vec_cts
+ * so we just redefine it and cast it
+*/
+#if __clang_major__ > 4
+#   undef vec_cts
+#   define vec_cts(__a, __b)                                                            \
+        _Generic((__a), vector float                                                    \
+           : (vector signed int)__builtin_altivec_vctsxs((__a), (__b)), vector double   \
+           : __extension__({                                                            \
+             vector double __ret =                                                      \
+                 (__a) *                                                                \
+                 (vector double)(vector unsigned long long)((0x3ffULL + (__b))          \
+                                                            << 52);                     \
+             __builtin_convertvector(__ret, vector signed long long);                   \
+           }))
+#endif // __clang_major__ > 4
+
+#endif // CLANG VSX compatibility
+
+/*
+ * implement vsx_ld(offset, pointer), vsx_st(vector, offset, pointer)
+ * load and set using offset depend on the pointer type
+ *
+ * implement vsx_ldf(offset, pointer), vsx_stf(vector, offset, pointer)
+ * load and set using offset depend on fixed bytes size
+ *
+ * Note: In clang vec_xl and vec_xst fails to load unaligned addresses
+ * so we are using vec_vsx_ld, vec_vsx_st instead
+*/
+
+#if defined(__clang__) && !defined(__IBMCPP__)
+#   define vsx_ldf  vec_vsx_ld
+#   define vsx_stf  vec_vsx_st
+#else // GCC , XLC
+#   define vsx_ldf  vec_xl
+#   define vsx_stf  vec_xst
+#endif
+
+#define VSX_OFFSET(o, p) ((o) * sizeof(*(p)))
+#define vsx_ld(o, p) vsx_ldf(VSX_OFFSET(o, p), p)
+#define vsx_st(v, o, p) vsx_stf(v, VSX_OFFSET(o, p), p)
+
+/*
+ * implement vsx_ld2(offset, pointer), vsx_st2(vector, offset, pointer) to load and store double words
+ * In GCC vec_xl and vec_xst it maps to vec_vsx_ld, vec_vsx_st which doesn't support long long
+ * and in CLANG we are using vec_vsx_ld, vec_vsx_st because vec_xl, vec_xst fails to load unaligned addresses
+ *
+ * In XLC vec_xl and vec_xst fail to cast int64(long int) to long long
+*/
+#if (defined(__GNUG__) || defined(__clang__)) && !defined(__IBMCPP__)
+    FORCE_INLINE(vec_udword2) vsx_ld2(long o, const uint64* p)
+    { return vec_udword2_c(vsx_ldf(VSX_OFFSET(o, p), (unsigned int*)p)); }
+
+    FORCE_INLINE(vec_dword2) vsx_ld2(long o, const int64* p)
+    { return vec_dword2_c(vsx_ldf(VSX_OFFSET(o, p), (int*)p)); }
+
+    FORCE_INLINE(void) vsx_st2(const vec_udword2& vec, long o, uint64* p)
+    { vsx_stf(vec_uint4_c(vec), VSX_OFFSET(o, p), (unsigned int*)p); }
+
+    FORCE_INLINE(void) vsx_st2(const vec_dword2& vec, long o, int64* p)
+    { vsx_stf(vec_int4_c(vec), VSX_OFFSET(o, p), (int*)p); }
+#else // XLC
+    FORCE_INLINE(vec_udword2) vsx_ld2(long o, const uint64* p)
+    { return vsx_ldf(VSX_OFFSET(o, p), (unsigned long long*)p); }
+
+    FORCE_INLINE(vec_dword2) vsx_ld2(long o, const int64* p)
+    { return vsx_ldf(VSX_OFFSET(o, p), (long long*)p); }
+
+    FORCE_INLINE(void) vsx_st2(const vec_udword2& vec, long o, uint64* p)
+    { vsx_stf(vec, VSX_OFFSET(o, p), (unsigned long long*)p); }
+
+    FORCE_INLINE(void) vsx_st2(const vec_dword2& vec, long o, int64* p)
+    { vsx_stf(vec, VSX_OFFSET(o, p), (long long*)p); }
+#endif
+
+#if defined(__clang__) || defined(__IBMCPP__)
+    // gcc can find his way in casting log int and XLC, CLANG ambiguous
+    FORCE_INLINE(vec_udword2) vec_splats(uint64 v)
+    { return vec_splats((unsigned long long) v); }
+
+    FORCE_INLINE(vec_dword2) vec_splats(int64 v)
+    { return vec_splats((long long) v); }
+#endif
+
+// Implement store vector bool char for XLC
+#if defined(__IBMCPP__) && defined(__clang__)
+    FORCE_INLINE(void) vec_xst(const vec_bchar16 &vec, long o, uchar* p)
+    { vec_xst(vec_uchar16_c(vec), VSX_OFFSET(o, p), p); }
+#endif
+
+// Working around vec_popcnt compatibility
+/*
+ * vec_popcnt should return unsigned but clang has different thought just like gcc in vec_vpopcnt
+ *
+ * use vec_popcntu instead to deal with it
+*/
+#if defined(__clang__) && !defined(__IBMCPP__)
+#   define VSX_IMPL_CLANG_POPCNTU(Tvec, Tvec2, ucast)   \
+    FORCE_INLINE(Tvec) vec_popcntu(const Tvec2& a)      \
+    { return ucast(vec_popcnt(a)); }
+
+    VSX_IMPL_CLANG_POPCNTU(vec_uchar16, vec_char16, vec_uchar16_c);
+    VSX_IMPL_CLANG_POPCNTU(vec_ushort8, vec_short8, vec_ushort8_c);
+    VSX_IMPL_CLANG_POPCNTU(vec_uint4, vec_int4, vec_uint4_c);
+    // redirect unsigned types
+    VSX_REDIRECT_1RG(vec_uchar16, vec_uchar16, vec_popcntu, vec_popcnt)
+    VSX_REDIRECT_1RG(vec_ushort8, vec_ushort8, vec_popcntu, vec_popcnt)
+    VSX_REDIRECT_1RG(vec_uint4, vec_uint4, vec_popcntu, vec_popcnt)
+#else
+#   define vec_popcntu vec_popcnt
+#endif
+
+// Working around vec_cts compatibility
+/*
+ * vec_cts in gcc and clang converts single-precision to signed fixed-point word
+ * and from double-precision to signed doubleword, also there's no implement for vec_ctsl
+ *
+ * vec_cts in xlc converts single and double precision to signed fixed-point word
+ * and xlc has vec_ctsl which converts single and double precision to signed doubleword
+ *
+ * so to deal with this situation, use vec_cts only if you want to convert single-precision to signed fixed-point word
+ * and use vec_ctsl when you want to convert double-precision to signed doubleword
+ *
+ * Also we implemented vec_ctsw(a) to convert double-precision to signed fixed-point word
+*/
+
+// converts double-precision to signed doubleword for GCC and CLANG
+#if !defined(vec_ctsl) && !defined(__IBMCPP__) && (defined(__GNUG__) || defined(__clang__))
+// GCC4 has incorrect results in convert to signed doubleword
+#   if !defined(__clang__) && __GNUG__ < 5
+#       define vec_ctsl(a, b) __vec_ctsl(a)
+        VSX_IMPL_1RG(vec_dword2, wi, vec_double2, wd, xvcvdpsxds, __vec_ctsl)
+#   else // GCC > 4 , CLANG
+#       define vec_ctsl vec_cts
+#   endif
+#endif
+
+// converts double-precision to signed fixed-point word
+#if defined(__IBMCPP__)
+#   define vec_ctsw(a) vec_cts(a, 0)
+#else // GCC, CLANG
+#   define vec_ctsw(a) vec_int4_c(__builtin_vsx_xvcvdpsxws(a))
+#endif
+
+// load 4 unsigned bytes into uint4 vector
+#define vec_ld_buw(p) vec_uint4_set((p)[0], (p)[1], (p)[2], (p)[3])
+
+// load 4 signed bytes into int4 vector
+#define vec_ld_bsw(p) vec_int4_set((p)[0], (p)[1], (p)[2], (p)[3])
+
+// load 4 unsigned bytes into float vector
+#define vec_ld_bps(p) vec_ctf(vec_ld_buw(p), 0)
+
+// Store lower 8 byte
+#define vec_st_l8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 0)
+
+// Store higher 8 byte
+#define vec_st_h8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 1)
+
+/*
+ * vec_ld_l8(ptr) -> Load 64-bits of integer data to lower part
+ * vec_ldz_l8(ptr) -> Load 64-bits of integer data to lower part and zero upper part
+**/
+#if defined(__clang__) && !defined(__IBMCPP__)
+#   define __VSX_LOAD_L8(Tvec, p) (Tvec)((vec_udword2)*((uint64*)(p)))
+#else
+#   define __VSX_LOAD_L8(Tvec, p) *((Tvec*)(p))
+#endif
+
+#define VSX_IMPL_LOAD_L8(Tvec, Tp)                                              \
+FORCE_INLINE(Tvec) vec_ld_l8(const Tp *p)                                       \
+{ return __VSX_LOAD_L8(Tvec, p); }                                              \
+FORCE_INLINE(Tvec) vec_ldz_l8(const Tp *p)                                      \
+{                                                                               \
+    static const vec_bdword2 mask = {0xFFFFFFFFFFFFFFFF, 0x0000000000000000};   \
+    return vec_and(vec_ld_l8(p), (Tvec)mask);                                   \
+}
+VSX_IMPL_LOAD_L8(vec_uchar16, uchar)
+VSX_IMPL_LOAD_L8(vec_char16, schar)
+VSX_IMPL_LOAD_L8(vec_ushort8, ushort)
+VSX_IMPL_LOAD_L8(vec_short8, short)
+VSX_IMPL_LOAD_L8(vec_uint4, uint)
+VSX_IMPL_LOAD_L8(vec_int4, int)
+VSX_IMPL_LOAD_L8(vec_float4, float)
+VSX_IMPL_LOAD_L8(vec_udword2, uint64)
+VSX_IMPL_LOAD_L8(vec_dword2, int64)
+VSX_IMPL_LOAD_L8(vec_double2, double)
+
+// logical not
+#define vec_not(a) vec_nor(a, a)
+
+// power9 yaya
+// not equal
+#ifndef vec_cmpne
+#   define vec_cmpne(a, b) vec_not(vec_cmpeq(a, b))
+#endif
+
+// absoulte difference
+#ifndef vec_absd
+#   define vec_absd(a, b) vec_sub(vec_max(a, b), vec_min(a, b))
+#endif
+
+/*
+ * Implement vec_unpacklu and vec_unpackhu
+ * since vec_unpackl, vec_unpackh only support signed integers
+**/
+#define VSX_IMPL_UNPACKU(rt, rg, zero)                  \
+FORCE_INLINE(rt) vec_unpacklu(const rg& a)              \
+{ return reinterpret_cast<rt>(vec_mergel(a, zero)); }   \
+FORCE_INLINE(rt) vec_unpackhu(const rg& a)              \
+{ return reinterpret_cast<rt>(vec_mergeh(a, zero));  }
+
+VSX_IMPL_UNPACKU(vec_ushort8, vec_uchar16, vec_uchar16_z)
+VSX_IMPL_UNPACKU(vec_uint4, vec_ushort8, vec_ushort8_z)
+VSX_IMPL_UNPACKU(vec_udword2, vec_uint4, vec_uint4_z)
+
+/*
+ * Implement vec_mergesqe and vec_mergesqo
+ * Merges the sequence values of even and odd elements of two vectors
+*/
+// 16
+#define perm16_mergesqe 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+#define perm16_mergesqo 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
+VSX_IMPL_PERM(vec_uchar16, vec_mergesqe, perm16_mergesqe)
+VSX_IMPL_PERM(vec_uchar16, vec_mergesqo, perm16_mergesqo)
+VSX_IMPL_PERM(vec_char16, vec_mergesqe, perm16_mergesqe)
+VSX_IMPL_PERM(vec_char16, vec_mergesqo, perm16_mergesqo)
+// 8
+#define perm8_mergesqe 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
+#define perm8_mergesqo 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
+VSX_IMPL_PERM(vec_ushort8, vec_mergesqe, perm8_mergesqe)
+VSX_IMPL_PERM(vec_ushort8, vec_mergesqo, perm8_mergesqo)
+VSX_IMPL_PERM(vec_short8, vec_mergesqe, perm8_mergesqe)
+VSX_IMPL_PERM(vec_short8, vec_mergesqo, perm8_mergesqo)
+// 4
+#define perm4_mergesqe 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+#define perm4_mergesqo 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+VSX_IMPL_PERM(vec_uint4, vec_mergesqe, perm4_mergesqe)
+VSX_IMPL_PERM(vec_uint4, vec_mergesqo, perm4_mergesqo)
+VSX_IMPL_PERM(vec_int4, vec_mergesqe, perm4_mergesqe)
+VSX_IMPL_PERM(vec_int4, vec_mergesqo, perm4_mergesqo)
+VSX_IMPL_PERM(vec_float4, vec_mergesqe, perm4_mergesqe)
+VSX_IMPL_PERM(vec_float4, vec_mergesqo, perm4_mergesqo)
+// 2
+VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqe, vec_mergeh)
+VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqo, vec_mergel)
+VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesqe, vec_mergeh)
+VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesqo, vec_mergel)
+VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqe, vec_mergeh)
+VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqo, vec_mergel)
+
+/*
+ * Implement vec_mergesqh and vec_mergesql
+ * Merges the sequence most and least significant halves of two vectors
+*/
+#define VSX_IMPL_MERGESQHL(Tvec)                                    \
+FORCE_INLINE(Tvec) vec_mergesqh(const Tvec& a, const Tvec& b)       \
+{ return (Tvec)vec_mergeh(vec_udword2_c(a), vec_udword2_c(b)); }    \
+FORCE_INLINE(Tvec) vec_mergesql(const Tvec& a, const Tvec& b)       \
+{ return (Tvec)vec_mergel(vec_udword2_c(a), vec_udword2_c(b)); }
+VSX_IMPL_MERGESQHL(vec_uchar16)
+VSX_IMPL_MERGESQHL(vec_char16)
+VSX_IMPL_MERGESQHL(vec_ushort8)
+VSX_IMPL_MERGESQHL(vec_short8)
+VSX_IMPL_MERGESQHL(vec_uint4)
+VSX_IMPL_MERGESQHL(vec_int4)
+VSX_IMPL_MERGESQHL(vec_float4)
+VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqh, vec_mergeh)
+VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesql, vec_mergel)
+VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesqh, vec_mergeh)
+VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesql, vec_mergel)
+VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqh, vec_mergeh)
+VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesql, vec_mergel)
+
+
+// 2 and 4 channels interleave for all types except 2 lanes
+#define VSX_IMPL_ST_INTERLEAVE(Tp, Tvec)                                    \
+FORCE_INLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, Tp* ptr) \
+{                                                                           \
+    vsx_stf(vec_mergeh(a, b), 0, ptr);                                      \
+    vsx_stf(vec_mergel(a, b), 16, ptr);                                     \
+}                                                                           \
+FORCE_INLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,          \
+                                     const Tvec& c, const Tvec& d, Tp* ptr) \
+{                                                                           \
+    Tvec ac = vec_mergeh(a, c);                                             \
+    Tvec bd = vec_mergeh(b, d);                                             \
+    vsx_stf(vec_mergeh(ac, bd), 0, ptr);                                    \
+    vsx_stf(vec_mergel(ac, bd), 16, ptr);                                   \
+    ac = vec_mergel(a, c);                                                  \
+    bd = vec_mergel(b, d);                                                  \
+    vsx_stf(vec_mergeh(ac, bd), 32, ptr);                                   \
+    vsx_stf(vec_mergel(ac, bd), 48, ptr);                                   \
+}
+VSX_IMPL_ST_INTERLEAVE(uchar, vec_uchar16)
+VSX_IMPL_ST_INTERLEAVE(schar, vec_char16)
+VSX_IMPL_ST_INTERLEAVE(ushort, vec_ushort8)
+VSX_IMPL_ST_INTERLEAVE(short, vec_short8)
+VSX_IMPL_ST_INTERLEAVE(uint, vec_uint4)
+VSX_IMPL_ST_INTERLEAVE(int, vec_int4)
+VSX_IMPL_ST_INTERLEAVE(float, vec_float4)
+
+// 2 and 4 channels deinterleave for 16 lanes
+#define VSX_IMPL_ST_DINTERLEAVE_8(Tp, Tvec)                                 \
+FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b)     \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(16, ptr);                                              \
+    a = vec_mergesqe(v0, v1);                                               \
+    b = vec_mergesqo(v0, v1);                                               \
+}                                                                           \
+FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b,     \
+                                       Tvec& c, Tvec& d)                    \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(16, ptr);                                              \
+    Tvec v2 = vsx_ld(32, ptr);                                              \
+    Tvec v3 = vsx_ld(48, ptr);                                              \
+    Tvec m0 = vec_mergesqe(v0, v1);                                         \
+    Tvec m1 = vec_mergesqe(v2, v3);                                         \
+    a = vec_mergesqe(m0, m1);                                               \
+    c = vec_mergesqo(m0, m1);                                               \
+    m0 = vec_mergesqo(v0, v1);                                              \
+    m1 = vec_mergesqo(v2, v3);                                              \
+    b = vec_mergesqe(m0, m1);                                               \
+    d = vec_mergesqo(m0, m1);                                               \
+}
+VSX_IMPL_ST_DINTERLEAVE_8(uchar, vec_uchar16)
+VSX_IMPL_ST_DINTERLEAVE_8(schar, vec_char16)
+
+// 2 and 4 channels deinterleave for 8 lanes
+#define VSX_IMPL_ST_DINTERLEAVE_16(Tp, Tvec)                                \
+FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b)     \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(8, ptr);                                               \
+    a = vec_mergesqe(v0, v1);                                               \
+    b = vec_mergesqo(v0, v1);                                               \
+}                                                                           \
+FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b,     \
+                                       Tvec& c, Tvec& d)                    \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(8, ptr);                                               \
+    Tvec m0 = vec_mergeh(v0, v1);                                           \
+    Tvec m1 = vec_mergel(v0, v1);                                           \
+    Tvec ab0 = vec_mergeh(m0, m1);                                          \
+    Tvec cd0 = vec_mergel(m0, m1);                                          \
+    v0 = vsx_ld(16, ptr);                                                   \
+    v1 = vsx_ld(24, ptr);                                                   \
+    m0 = vec_mergeh(v0, v1);                                                \
+    m1 = vec_mergel(v0, v1);                                                \
+    Tvec ab1 = vec_mergeh(m0, m1);                                          \
+    Tvec cd1 = vec_mergel(m0, m1);                                          \
+    a = vec_mergesqh(ab0, ab1);                                             \
+    b = vec_mergesql(ab0, ab1);                                             \
+    c = vec_mergesqh(cd0, cd1);                                             \
+    d = vec_mergesql(cd0, cd1);                                             \
+}
+VSX_IMPL_ST_DINTERLEAVE_16(ushort, vec_ushort8)
+VSX_IMPL_ST_DINTERLEAVE_16(short, vec_short8)
+
+// 2 and 4 channels deinterleave for 4 lanes
+#define VSX_IMPL_ST_DINTERLEAVE_32(Tp, Tvec)                                \
+FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b)     \
+{                                                                           \
+    a = vsx_ld(0, ptr);                                                     \
+    b = vsx_ld(4, ptr);                                                     \
+    Tvec m0 = vec_mergeh(a, b);                                             \
+    Tvec m1 = vec_mergel(a, b);                                             \
+    a = vec_mergeh(m0, m1);                                                 \
+    b = vec_mergel(m0, m1);                                                 \
+}                                                                           \
+FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b,     \
+                                       Tvec& c, Tvec& d)                    \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(4, ptr);                                               \
+    Tvec v2 = vsx_ld(8, ptr);                                               \
+    Tvec v3 = vsx_ld(12, ptr);                                              \
+    Tvec m0 = vec_mergeh(v0, v2);                                           \
+    Tvec m1 = vec_mergeh(v1, v3);                                           \
+    a = vec_mergeh(m0, m1);                                                 \
+    b = vec_mergel(m0, m1);                                                 \
+    m0 = vec_mergel(v0, v2);                                                \
+    m1 = vec_mergel(v1, v3);                                                \
+    c = vec_mergeh(m0, m1);                                                 \
+    d = vec_mergel(m0, m1);                                                 \
+}
+VSX_IMPL_ST_DINTERLEAVE_32(uint, vec_uint4)
+VSX_IMPL_ST_DINTERLEAVE_32(int, vec_int4)
+VSX_IMPL_ST_DINTERLEAVE_32(float, vec_float4)
+
+// 2 and 4 channels interleave and deinterleave for 2 lanes
+#define VSX_IMPL_ST_D_INTERLEAVE_64(Tp, Tvec, ld_func, st_func)             \
+FORCE_INLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, Tp* ptr) \
+{                                                                           \
+    st_func(vec_mergeh(a, b), 0, ptr);                                      \
+    st_func(vec_mergel(a, b), 2, ptr);                                      \
+}                                                                           \
+FORCE_INLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,          \
+                                     const Tvec& c, const Tvec& d, Tp* ptr) \
+{                                                                           \
+    st_func(vec_mergeh(a, b), 0, ptr);                                      \
+    st_func(vec_mergel(a, b), 2, ptr);                                      \
+    st_func(vec_mergeh(c, d), 4, ptr);                                      \
+    st_func(vec_mergel(c, d), 6, ptr);                                      \
+}                                                                           \
+FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b)     \
+{                                                                           \
+    Tvec m0 = ld_func(0, ptr);                                              \
+    Tvec m1 = ld_func(2, ptr);                                              \
+    a = vec_mergeh(m0, m1);                                                 \
+    b = vec_mergel(m0, m1);                                                 \
+}                                                                           \
+FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b,     \
+                                       Tvec& c, Tvec& d)                    \
+{                                                                           \
+    Tvec v0 = ld_func(0, ptr);                                              \
+    Tvec v1 = ld_func(2, ptr);                                              \
+    a = vec_mergeh(v0, v1);                                                 \
+    b = vec_mergel(v0, v1);                                                 \
+    v0 = ld_func(4, ptr);                                                   \
+    v1 = ld_func(6, ptr);                                                   \
+    c = vec_mergeh(v0, v1);                                                 \
+    d = vec_mergel(v0, v1);                                                 \
+}
+VSX_IMPL_ST_D_INTERLEAVE_64(int64, vec_dword2, vsx_ld2, vsx_st2)
+VSX_IMPL_ST_D_INTERLEAVE_64(uint64, vec_udword2, vsx_ld2, vsx_st2)
+VSX_IMPL_ST_D_INTERLEAVE_64(double, vec_double2, vsx_ld, vsx_st)
+
+/* 3 channels */
+#define VSX_IMPL_ST_INTERLEAVE_3CH_16(Tp, Tvec)                                                   \
+FORCE_INLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,                                \
+                                     const Tvec& c, Tp* ptr)                                      \
+{                                                                                                 \
+    static const vec_uchar16 a12 = {0, 16, 0, 1, 17, 0, 2, 18, 0, 3, 19, 0, 4, 20, 0, 5};         \
+    static const vec_uchar16 a123 = {0, 1, 16, 3, 4, 17, 6, 7, 18, 9, 10, 19, 12, 13, 20, 15};    \
+    vsx_st(vec_perm(vec_perm(a, b, a12), c, a123), 0, ptr);                                       \
+    static const vec_uchar16 b12 = {21, 0, 6, 22, 0, 7, 23, 0, 8, 24, 0, 9, 25, 0, 10, 26};       \
+    static const vec_uchar16 b123 = {0, 21, 2, 3, 22, 5, 6, 23, 8, 9, 24, 11, 12, 25, 14, 15};    \
+    vsx_st(vec_perm(vec_perm(a, b, b12), c, b123), 16, ptr);                                      \
+    static const vec_uchar16 c12 = {0, 11, 27, 0, 12, 28, 0, 13, 29, 0, 14, 30, 0, 15, 31, 0};    \
+    static const vec_uchar16 c123 = {26, 1, 2, 27, 4, 5, 28, 7, 8, 29, 10, 11, 30, 13, 14, 31};   \
+    vsx_st(vec_perm(vec_perm(a, b, c12), c, c123), 32, ptr);                                      \
+}                                                                                                 \
+FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c)                  \
+{                                                                                                 \
+    Tvec v1 = vsx_ld(0, ptr);                                                                     \
+    Tvec v2 = vsx_ld(16, ptr);                                                                    \
+    Tvec v3 = vsx_ld(32, ptr);                                                                    \
+    static const vec_uchar16 a12_perm = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 0, 0, 0, 0, 0};  \
+    static const vec_uchar16 a123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 17, 20, 23, 26, 29};  \
+    a = vec_perm(vec_perm(v1, v2, a12_perm), v3, a123_perm);                                      \
+    static const vec_uchar16 b12_perm = {1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 0, 0, 0, 0, 0}; \
+    static const vec_uchar16 b123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 18, 21, 24, 27, 30};  \
+    b = vec_perm(vec_perm(v1, v2, b12_perm), v3, b123_perm);                                      \
+    static const vec_uchar16 c12_perm = {2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 0, 0, 0, 0, 0, 0};  \
+    static const vec_uchar16 c123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 19, 22, 25, 28, 31};  \
+    c = vec_perm(vec_perm(v1, v2, c12_perm), v3, c123_perm);                                      \
+}
+VSX_IMPL_ST_INTERLEAVE_3CH_16(uchar, vec_uchar16)
+VSX_IMPL_ST_INTERLEAVE_3CH_16(schar, vec_char16)
+
+#define VSX_IMPL_ST_INTERLEAVE_3CH_8(Tp, Tvec)                                                    \
+FORCE_INLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,                                \
+                                     const Tvec& c, Tp* ptr)                                      \
+{                                                                                                 \
+    static const vec_uchar16 a12 = {0, 1, 16, 17, 0, 0, 2, 3, 18, 19, 0, 0, 4, 5, 20, 21};        \
+    static const vec_uchar16 a123 = {0, 1, 2, 3, 16, 17, 6, 7, 8, 9, 18, 19, 12, 13, 14, 15};     \
+    vsx_st(vec_perm(vec_perm(a, b, a12), c, a123), 0, ptr);                                       \
+    static const vec_uchar16 b12 = {0, 0, 6, 7, 22, 23, 0, 0, 8, 9, 24, 25, 0, 0, 10, 11};        \
+    static const vec_uchar16 b123 = {20, 21, 2, 3, 4, 5, 22, 23, 8, 9, 10, 11, 24, 25, 14, 15};   \
+    vsx_st(vec_perm(vec_perm(a, b, b12), c, b123), 8, ptr);                                       \
+    static const vec_uchar16 c12 = {26, 27, 0, 0, 12, 13, 28, 29, 0, 0, 14, 15, 30, 31, 0, 0};    \
+    static const vec_uchar16 c123 = {0, 1, 26, 27, 4, 5, 6, 7, 28, 29, 10, 11, 12, 13, 30, 31};   \
+    vsx_st(vec_perm(vec_perm(a, b, c12), c, c123), 16, ptr);                                      \
+}                                                                                                 \
+FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c)                  \
+{                                                                                                 \
+    Tvec v1 = vsx_ld(0, ptr);                                                                     \
+    Tvec v2 = vsx_ld(8, ptr);                                                                     \
+    Tvec v3 = vsx_ld(16, ptr);                                                                    \
+    static const vec_uchar16 a12_perm = {0, 1, 6, 7, 12, 13, 18, 19, 24, 25, 30, 31, 0, 0, 0, 0}; \
+    static const vec_uchar16 a123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 20, 21, 26, 27};  \
+    a = vec_perm(vec_perm(v1, v2, a12_perm), v3, a123_perm);                                      \
+    static const vec_uchar16 b12_perm = {2, 3, 8, 9, 14, 15, 20, 21, 26, 27, 0, 0, 0, 0, 0, 0};   \
+    static const vec_uchar16 b123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 22, 23, 28, 29};  \
+    b = vec_perm(vec_perm(v1, v2, b12_perm), v3, b123_perm);                                      \
+    static const vec_uchar16 c12_perm = {4, 5, 10, 11, 16, 17, 22, 23, 28, 29, 0, 0, 0, 0, 0, 0}; \
+    static const vec_uchar16 c123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 18, 19, 24, 25, 30, 31};  \
+    c = vec_perm(vec_perm(v1, v2, c12_perm), v3, c123_perm);                                      \
+}
+VSX_IMPL_ST_INTERLEAVE_3CH_8(ushort, vec_ushort8)
+VSX_IMPL_ST_INTERLEAVE_3CH_8(short, vec_short8)
+
+#define VSX_IMPL_ST_INTERLEAVE_3CH_4(Tp, Tvec)                                                     \
+FORCE_INLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,                                 \
+                                     const Tvec& c, Tp* ptr)                                       \
+{                                                                                                  \
+    Tvec hbc = vec_mergeh(b, c);                                                                   \
+    static const vec_uchar16 ahbc = {0, 1, 2, 3, 16, 17, 18, 19, 20, 21, 22, 23, 4, 5, 6, 7};      \
+    vsx_st(vec_perm(a, hbc, ahbc), 0, ptr);                                                        \
+    Tvec lab = vec_mergel(a, b);                                                                   \
+    vsx_st(vec_sld(lab, hbc, 8), 4, ptr);                                                          \
+    static const vec_uchar16 clab = {8, 9, 10, 11, 24, 25, 26, 27, 28, 29, 30, 31, 12, 13, 14, 15};\
+    vsx_st(vec_perm(c, lab, clab), 8, ptr);                                                        \
+}                                                                                                  \
+FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c)                   \
+{                                                                                                  \
+    Tvec v1 = vsx_ld(0, ptr);                                                                      \
+    Tvec v2 = vsx_ld(4, ptr);                                                                      \
+    Tvec v3 = vsx_ld(8, ptr);                                                                      \
+    static const vec_uchar16 flp = {0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 28, 29, 30, 31};   \
+    a = vec_perm(v1, vec_sld(v3, v2, 8), flp);                                                     \
+    static const vec_uchar16 flp2 = {28, 29, 30, 31, 0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19};  \
+    b = vec_perm(v2, vec_sld(v1, v3, 8), flp2);                                                    \
+    c = vec_perm(vec_sld(v2, v1, 8), v3, flp);                                                     \
+}
+VSX_IMPL_ST_INTERLEAVE_3CH_4(uint, vec_uint4)
+VSX_IMPL_ST_INTERLEAVE_3CH_4(int, vec_int4)
+VSX_IMPL_ST_INTERLEAVE_3CH_4(float, vec_float4)
+
+#define VSX_IMPL_ST_INTERLEAVE_3CH_2(Tp, Tvec, ld_func, st_func)     \
+FORCE_INLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,   \
+                                     const Tvec& c, Tp* ptr)         \
+{                                                                    \
+    st_func(vec_mergeh(a, b), 0, ptr);                               \
+    st_func(vec_permi(c, a, 1), 2, ptr);                             \
+    st_func(vec_mergel(b, c), 4, ptr);                               \
+}                                                                    \
+FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a,       \
+                                       Tvec& b, Tvec& c)             \
+{                                                                    \
+    Tvec v1 = ld_func(0, ptr);                                       \
+    Tvec v2 = ld_func(2, ptr);                                       \
+    Tvec v3 = ld_func(4, ptr);                                       \
+    a = vec_permi(v1, v2, 1);                                        \
+    b = vec_permi(v1, v3, 2);                                        \
+    c = vec_permi(v2, v3, 1);                                        \
+}
+VSX_IMPL_ST_INTERLEAVE_3CH_2(int64, vec_dword2, vsx_ld2, vsx_st2)
+VSX_IMPL_ST_INTERLEAVE_3CH_2(uint64, vec_udword2, vsx_ld2, vsx_st2)
+VSX_IMPL_ST_INTERLEAVE_3CH_2(double, vec_double2, vsx_ld, vsx_st)
+
+#endif // CV_VSX
+
+//! @}
+
+#endif // OPENCV_HAL_VSX_UTILS_HPP
index b4406c4..4572153 100644 (file)
@@ -1368,29 +1368,25 @@ struct InRange_SIMD
     }
 };
 
-#if CV_SSE2
+#if CV_SIMD128
 
 template <>
 struct InRange_SIMD<uchar>
 {
     int operator () (const uchar * src1, const uchar * src2, const uchar * src3,
-                     uchar * dst, int len) const
+        uchar * dst, int len) const
     {
         int x = 0;
+        const int width = v_uint8x16::nlanes;
 
-        if (USE_SSE2)
+        for (; x <= len - width; x += width)
         {
-            __m128i v_full = _mm_set1_epi8(-1), v_128 = _mm_set1_epi8(-128);
+            v_uint8x16 values = v_load(src1 + x);
+            v_uint8x16 low = v_load(src2 + x);
+            v_uint8x16 high = v_load(src3 + x);
 
-            for ( ; x <= len - 16; x += 16 )
-            {
-                __m128i v_src = _mm_add_epi8(_mm_loadu_si128((const __m128i *)(src1 + x)), v_128);
-                __m128i v_mask1 = _mm_cmpgt_epi8(_mm_add_epi8(_mm_loadu_si128((const __m128i *)(src2 + x)), v_128), v_src);
-                __m128i v_mask2 = _mm_cmpgt_epi8(v_src, _mm_add_epi8(_mm_loadu_si128((const __m128i *)(src3 + x)), v_128));
-                _mm_storeu_si128((__m128i *)(dst + x), _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full));
-            }
+            v_store(dst + x, (values >= low) & (high >= values));
         }
-
         return x;
     }
 };
@@ -1399,23 +1395,19 @@ template <>
 struct InRange_SIMD<schar>
 {
     int operator () (const schar * src1, const schar * src2, const schar * src3,
-                     uchar * dst, int len) const
+        uchar * dst, int len) const
     {
         int x = 0;
+        const int width = v_int8x16::nlanes;
 
-        if (USE_SSE2)
+        for (; x <= len - width; x += width)
         {
-            __m128i v_full = _mm_set1_epi8(-1);
+            v_int8x16 values = v_load(src1 + x);
+            v_int8x16 low = v_load(src2 + x);
+            v_int8x16 high = v_load(src3 + x);
 
-            for ( ; x <= len - 16; x += 16 )
-            {
-                __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x));
-                __m128i v_mask1 = _mm_cmpgt_epi8(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src);
-                __m128i v_mask2 = _mm_cmpgt_epi8(v_src, _mm_loadu_si128((const __m128i *)(src3 + x)));
-                _mm_storeu_si128((__m128i *)(dst + x), _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full));
-            }
+            v_store((schar*)(dst + x), (values >= low) & (high >= values));
         }
-
         return x;
     }
 };
@@ -1424,181 +1416,22 @@ template <>
 struct InRange_SIMD<ushort>
 {
     int operator () (const ushort * src1, const ushort * src2, const ushort * src3,
-                     uchar * dst, int len) const
-    {
-        int x = 0;
-
-        if (USE_SSE2)
-        {
-            __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi16(-1), v_32768 = _mm_set1_epi16(-32768);
-
-            for ( ; x <= len - 8; x += 8 )
-            {
-                __m128i v_src = _mm_add_epi16(_mm_loadu_si128((const __m128i *)(src1 + x)), v_32768);
-                __m128i v_mask1 = _mm_cmpgt_epi16(_mm_add_epi16(_mm_loadu_si128((const __m128i *)(src2 + x)), v_32768), v_src);
-                __m128i v_mask2 = _mm_cmpgt_epi16(v_src, _mm_add_epi16(_mm_loadu_si128((const __m128i *)(src3 + x)), v_32768));
-                __m128i v_res = _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full);
-                _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(_mm_srli_epi16(v_res, 8), v_zero));
-            }
-        }
-
-        return x;
-    }
-};
-
-template <>
-struct InRange_SIMD<short>
-{
-    int operator () (const short * src1, const short * src2, const short * src3,
-                     uchar * dst, int len) const
-    {
-        int x = 0;
-
-        if (USE_SSE2)
-        {
-            __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi16(-1);
-
-            for ( ; x <= len - 8; x += 8 )
-            {
-                __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x));
-                __m128i v_mask1 = _mm_cmpgt_epi16(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src);
-                __m128i v_mask2 = _mm_cmpgt_epi16(v_src, _mm_loadu_si128((const __m128i *)(src3 + x)));
-                __m128i v_res = _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full);
-                _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(_mm_srli_epi16(v_res, 8), v_zero));
-            }
-        }
-
-        return x;
-    }
-};
-
-template <>
-struct InRange_SIMD<int>
-{
-    int operator () (const int * src1, const int * src2, const int * src3,
-                     uchar * dst, int len) const
+        uchar * dst, int len) const
     {
         int x = 0;
+        const int width = v_uint16x8::nlanes * 2;
 
-        if (USE_SSE2)
+        for (; x <= len - width; x += width)
         {
-            __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi32(-1);
+            v_uint16x8 values1 = v_load(src1 + x);
+            v_uint16x8 low1 = v_load(src2 + x);
+            v_uint16x8 high1 = v_load(src3 + x);
 
-            for ( ; x <= len - 8; x += 8 )
-            {
-                __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x));
-                __m128i v_res1 = _mm_or_si128(_mm_cmpgt_epi32(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src),
-                    _mm_cmpgt_epi32(v_src, _mm_loadu_si128((const __m128i *)(src3 + x))));
-
-                v_src = _mm_loadu_si128((const __m128i *)(src1 + x + 4));
-                __m128i v_res2 = _mm_or_si128(_mm_cmpgt_epi32(_mm_loadu_si128((const __m128i *)(src2 + x + 4)), v_src),
-                    _mm_cmpgt_epi32(v_src, _mm_loadu_si128((const __m128i *)(src3 + x + 4))));
-
-                __m128i v_res = _mm_packs_epi32(_mm_srli_epi32(_mm_andnot_si128(v_res1, v_full), 16),
-                                                _mm_srli_epi32(_mm_andnot_si128(v_res2, v_full), 16));
-                _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(v_res, v_zero));
-            }
-        }
-
-        return x;
-    }
-};
-
-template <>
-struct InRange_SIMD<float>
-{
-    int operator () (const float * src1, const float * src2, const float * src3,
-                     uchar * dst, int len) const
-    {
-        int x = 0;
-
-        if (USE_SSE2)
-        {
-            __m128i v_zero = _mm_setzero_si128();
-
-            for ( ; x <= len - 8; x += 8 )
-            {
-                __m128 v_src = _mm_loadu_ps(src1 + x);
-                __m128 v_res1 = _mm_and_ps(_mm_cmple_ps(_mm_loadu_ps(src2 + x), v_src),
-                    _mm_cmple_ps(v_src, _mm_loadu_ps(src3 + x)));
+            v_uint16x8 values2 = v_load(src1 + x + v_uint16x8::nlanes);
+            v_uint16x8 low2 = v_load(src2 + x + v_uint16x8::nlanes);
+            v_uint16x8 high2 = v_load(src3 + x + v_uint16x8::nlanes);
 
-                v_src = _mm_loadu_ps(src1 + x + 4);
-                __m128 v_res2 = _mm_and_ps(_mm_cmple_ps(_mm_loadu_ps(src2 + x + 4), v_src),
-                    _mm_cmple_ps(v_src, _mm_loadu_ps(src3 + x + 4)));
-
-                __m128i v_res1i = _mm_cvtps_epi32(v_res1), v_res2i = _mm_cvtps_epi32(v_res2);
-                __m128i v_res = _mm_packs_epi32(_mm_srli_epi32(v_res1i, 16), _mm_srli_epi32(v_res2i, 16));
-                _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(v_res, v_zero));
-            }
-        }
-
-        return x;
-    }
-};
-
-#elif CV_NEON
-
-template <>
-struct InRange_SIMD<uchar>
-{
-    int operator () (const uchar * src1, const uchar * src2, const uchar * src3,
-                     uchar * dst, int len) const
-    {
-        int x = 0;
-
-        for ( ; x <= len - 16; x += 16 )
-        {
-            uint8x16_t values = vld1q_u8(src1 + x);
-            uint8x16_t low = vld1q_u8(src2 + x);
-            uint8x16_t high = vld1q_u8(src3 + x);
-
-            vst1q_u8(dst + x, vandq_u8(vcgeq_u8(values, low), vcgeq_u8(high, values)));
-        }
-        return x;
-    }
-};
-
-template <>
-struct InRange_SIMD<schar>
-{
-    int operator () (const schar * src1, const schar * src2, const schar * src3,
-                     uchar * dst, int len) const
-    {
-        int x = 0;
-
-        for ( ; x <= len - 16; x += 16 )
-        {
-            int8x16_t values = vld1q_s8(src1 + x);
-            int8x16_t low = vld1q_s8(src2 + x);
-            int8x16_t high = vld1q_s8(src3 + x);
-
-            vst1q_u8(dst + x, vandq_u8(vcgeq_s8(values, low), vcgeq_s8(high, values)));
-        }
-        return x;
-    }
-};
-
-template <>
-struct InRange_SIMD<ushort>
-{
-    int operator () (const ushort * src1, const ushort * src2, const ushort * src3,
-                     uchar * dst, int len) const
-    {
-        int x = 0;
-
-        for ( ; x <= len - 16; x += 16 )
-        {
-            uint16x8_t values = vld1q_u16((const uint16_t*)(src1 + x));
-            uint16x8_t low = vld1q_u16((const uint16_t*)(src2 + x));
-            uint16x8_t high = vld1q_u16((const uint16_t*)(src3 + x));
-            uint8x8_t  r1 = vmovn_u16(vandq_u16(vcgeq_u16(values, low), vcgeq_u16(high, values)));
-
-            values = vld1q_u16((const uint16_t*)(src1 + x + 8));
-            low = vld1q_u16((const uint16_t*)(src2 + x + 8));
-            high = vld1q_u16((const uint16_t*)(src3 + x + 8));
-            uint8x8_t  r2 = vmovn_u16(vandq_u16(vcgeq_u16(values, low), vcgeq_u16(high, values)));
-
-            vst1q_u8(dst + x, vcombine_u8(r1, r2));
+            v_store(dst + x, v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2)));
         }
         return x;
     }
@@ -1608,23 +1441,22 @@ template <>
 struct InRange_SIMD<short>
 {
     int operator () (const short * src1, const short * src2, const short * src3,
-                     uchar * dst, int len) const
+        uchar * dst, int len) const
     {
         int x = 0;
+        const int width = (int)v_int16x8::nlanes * 2;
 
-        for ( ; x <= len - 16; x += 16 )
+        for (; x <= len - width; x += width)
         {
-            int16x8_t values = vld1q_s16((const int16_t*)(src1 + x));
-            int16x8_t low = vld1q_s16((const int16_t*)(src2 + x));
-            int16x8_t high = vld1q_s16((const int16_t*)(src3 + x));
-            uint8x8_t r1 = vmovn_u16(vandq_u16(vcgeq_s16(values, low), vcgeq_s16(high, values)));
+            v_int16x8 values1 = v_load(src1 + x);
+            v_int16x8 low1 = v_load(src2 + x);
+            v_int16x8 high1 = v_load(src3 + x);
 
-            values = vld1q_s16((const int16_t*)(src1 + x + 8));
-            low = vld1q_s16((const int16_t*)(src2 + x + 8));
-            high = vld1q_s16((const int16_t*)(src3 + x + 8));
-            uint8x8_t r2 = vmovn_u16(vandq_u16(vcgeq_s16(values, low), vcgeq_s16(high, values)));
+            v_int16x8 values2 = v_load(src1 + x + v_int16x8::nlanes);
+            v_int16x8 low2 = v_load(src2 + x + v_int16x8::nlanes);
+            v_int16x8 high2 = v_load(src3 + x + v_int16x8::nlanes);
 
-            vst1q_u8(dst + x, vcombine_u8(r1, r2));
+            v_store((schar*)(dst + x), v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2)));
         }
         return x;
     }
@@ -1634,27 +1466,22 @@ template <>
 struct InRange_SIMD<int>
 {
     int operator () (const int * src1, const int * src2, const int * src3,
-                     uchar * dst, int len) const
+        uchar * dst, int len) const
     {
         int x = 0;
+        const int width = (int)v_int32x4::nlanes * 2;
 
-        for ( ; x <= len - 8; x += 8 )
+        for (; x <= len - width; x += width)
         {
-            int32x4_t values = vld1q_s32((const int32_t*)(src1 + x));
-            int32x4_t low = vld1q_s32((const int32_t*)(src2 + x));
-            int32x4_t high = vld1q_s32((const int32_t*)(src3 + x));
-
-            uint16x4_t r1 = vmovn_u32(vandq_u32(vcgeq_s32(values, low), vcgeq_s32(high, values)));
-
-            values = vld1q_s32((const int32_t*)(src1 + x + 4));
-            low = vld1q_s32((const int32_t*)(src2 + x + 4));
-            high = vld1q_s32((const int32_t*)(src3 + x + 4));
-
-            uint16x4_t r2 = vmovn_u32(vandq_u32(vcgeq_s32(values, low), vcgeq_s32(high, values)));
+            v_int32x4 values1 = v_load(src1 + x);
+            v_int32x4 low1 = v_load(src2 + x);
+            v_int32x4 high1 = v_load(src3 + x);
 
-            uint16x8_t res_16 = vcombine_u16(r1, r2);
+            v_int32x4 values2 = v_load(src1 + x + v_int32x4::nlanes);
+            v_int32x4 low2 = v_load(src2 + x + v_int32x4::nlanes);
+            v_int32x4 high2 = v_load(src3 + x + v_int32x4::nlanes);
 
-            vst1_u8(dst + x, vmovn_u16(res_16));
+            v_pack_store(dst + x, v_reinterpret_as_u16(v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2))));
         }
         return x;
     }
@@ -1664,27 +1491,22 @@ template <>
 struct InRange_SIMD<float>
 {
     int operator () (const float * src1, const float * src2, const float * src3,
-                     uchar * dst, int len) const
+        uchar * dst, int len) const
     {
         int x = 0;
+        const int width = (int)v_float32x4::nlanes * 2;
 
-        for ( ; x <= len - 8; x += 8 )
+        for (; x <= len - width; x += width)
         {
-            float32x4_t values = vld1q_f32((const float32_t*)(src1 + x));
-            float32x4_t low = vld1q_f32((const float32_t*)(src2 + x));
-            float32x4_t high = vld1q_f32((const float32_t*)(src3 + x));
+            v_float32x4 values1 = v_load(src1 + x);
+            v_float32x4 low1 = v_load(src2 + x);
+            v_float32x4 high1 = v_load(src3 + x);
 
-            uint16x4_t r1 = vmovn_u32(vandq_u32(vcgeq_f32(values, low), vcgeq_f32(high, values)));
+            v_float32x4 values2 = v_load(src1 + x + v_float32x4::nlanes);
+            v_float32x4 low2 = v_load(src2 + x + v_float32x4::nlanes);
+            v_float32x4 high2 = v_load(src3 + x + v_float32x4::nlanes);
 
-            values = vld1q_f32((const float32_t*)(src1 + x + 4));
-            low = vld1q_f32((const float32_t*)(src2 + x + 4));
-            high = vld1q_f32((const float32_t*)(src3 + x + 4));
-
-            uint16x4_t r2 = vmovn_u32(vandq_u32(vcgeq_f32(values, low), vcgeq_f32(high, values)));
-
-            uint16x8_t res_16 = vcombine_u16(r1, r2);
-
-            vst1_u8(dst + x, vmovn_u16(res_16));
+            v_pack_store(dst + x, v_pack(v_reinterpret_as_u32((values1 >= low1) & (high1 >= values1)), v_reinterpret_as_u32((values2 >= low2) & (high2 >= values2))));
         }
         return x;
     }
@@ -1823,7 +1645,7 @@ static bool ocl_inRange( InputArray _src, InputArray _lowerb,
     {
         if( !checkScalar(_lowerb, stype, lkind, skind) )
             CV_Error( CV_StsUnmatchedSizes,
-                     "The lower bounary is neither an array of the same size and same type as src, nor a scalar");
+                     "The lower boundary is neither an array of the same size and same type as src, nor a scalar");
         lbScalar = true;
     }
 
@@ -1832,7 +1654,7 @@ static bool ocl_inRange( InputArray _src, InputArray _lowerb,
     {
         if( !checkScalar(_upperb, stype, ukind, skind) )
             CV_Error( CV_StsUnmatchedSizes,
-                     "The upper bounary is neither an array of the same size and same type as src, nor a scalar");
+                     "The upper boundary is neither an array of the same size and same type as src, nor a scalar");
         ubScalar = true;
     }
 
@@ -1944,7 +1766,7 @@ void cv::inRange(InputArray _src, InputArray _lowerb,
     {
         if( !checkScalar(lb, src.type(), lkind, skind) )
             CV_Error( CV_StsUnmatchedSizes,
-                     "The lower bounary is neither an array of the same size and same type as src, nor a scalar");
+                     "The lower boundary is neither an array of the same size and same type as src, nor a scalar");
         lbScalar = true;
     }
 
@@ -1953,7 +1775,7 @@ void cv::inRange(InputArray _src, InputArray _lowerb,
     {
         if( !checkScalar(ub, src.type(), ukind, skind) )
             CV_Error( CV_StsUnmatchedSizes,
-                     "The upper bounary is neither an array of the same size and same type as src, nor a scalar");
+                     "The upper boundary is neither an array of the same size and same type as src, nor a scalar");
         ubScalar = true;
     }
 
@@ -2829,33 +2651,17 @@ void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
         for( ; height--; src1 += step1, src2 += step2, dst += step )
         {
             int x =0;
-            #if CV_SSE2
-            if( USE_SSE2 )
+#if CV_SIMD128
+            if( hasSIMD128() )
             {
-                __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (-1);
-                __m128i c128 = _mm_set1_epi8 (-128);
-                for( ; x <= width - 16; x += 16 )
-                {
-                    __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
-                    __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
-                    // no simd for 8u comparison, that's why we need the trick
-                    r00 = _mm_sub_epi8(r00,c128);
-                    r10 = _mm_sub_epi8(r10,c128);
-
-                    r00 =_mm_xor_si128(_mm_cmpgt_epi8(r00, r10), m128);
-                    _mm_storeu_si128((__m128i*)(dst + x),r00);
+                v_uint8x16 mask = v_setall_u8((uchar)m);
 
+                for( ; x <= width - v_uint8x16::nlanes; x += v_uint8x16::nlanes )
+                {
+                    v_store(dst + x, (v_load(src1 + x) > v_load(src2 + x)) ^ mask);
                 }
             }
-            #elif CV_NEON
-            uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255);
-
-            for( ; x <= width - 16; x += 16 )
-            {
-                vst1q_u8(dst+x, veorq_u8(vcgtq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask));
-            }
-
-           #endif
+#endif
 
             for( ; x < width; x++ ){
                 dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
@@ -2868,26 +2674,17 @@ void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
         for( ; height--; src1 += step1, src2 += step2, dst += step )
         {
             int x = 0;
-            #if CV_SSE2
-            if( USE_SSE2 )
+#if CV_SIMD128
+            if( hasSIMD128() )
             {
-                __m128i m128 =  code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (-1);
-                for( ; x <= width - 16; x += 16 )
+                v_uint8x16 mask = v_setall_u8((uchar)m);
+
+                for( ; x <= width - v_uint8x16::nlanes; x += v_uint8x16::nlanes )
                 {
-                    __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
-                    __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
-                    r00 = _mm_xor_si128 ( _mm_cmpeq_epi8 (r00, r10), m128);
-                    _mm_storeu_si128((__m128i*)(dst + x), r00);
+                    v_store(dst+x, (v_load(src1+x) == v_load(src2+x)) ^ mask);
                 }
             }
-            #elif CV_NEON
-            uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255);
-
-            for( ; x <= width - 16; x += 16 )
-            {
-                vst1q_u8(dst+x, veorq_u8(vceqq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask));
-            }
-           #endif
+#endif
            for( ; x < width; x++ )
                 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
         }
@@ -2932,49 +2729,26 @@ void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2,
         for( ; height--; src1 += step1, src2 += step2, dst += step )
         {
             int x =0;
-            #if CV_SSE2
-            if( USE_SSE2)
+#if CV_SIMD128
+            if( hasSIMD128() )
             {
-                __m128i m128 =  code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (-1);
-                for( ; x <= width - 16; x += 16 )
-                {
-                    __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
-                    __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
-                    r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128);
-                    __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8));
-                    __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8));
-                    r01 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r01, r11), m128);
-                    r11 = _mm_packs_epi16(r00, r01);
-                    _mm_storeu_si128((__m128i*)(dst + x), r11);
-                }
-                if( x <= width-8)
-                {
-                    __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
-                    __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
-                    r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128);
-                    r10 = _mm_packs_epi16(r00, r00);
-                    _mm_storel_epi64((__m128i*)(dst + x), r10);
-
-                    x += 8;
-                }
-            }
-            #elif CV_NEON
-            uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255);
+                v_uint8x16 mask = v_setall_u8((uchar)m);
+                const int dWidth = v_uint8x16::nlanes;
 
-            for( ; x <= width - 16; x += 16 )
-            {
-                int16x8_t in1 = vld1q_s16(src1 + x);
-                int16x8_t in2 = vld1q_s16(src2 + x);
-                uint8x8_t t1 = vmovn_u16(vcgtq_s16(in1, in2));
+                for( ; x <= width - dWidth; x += dWidth )
+                {
+                    v_int16x8 in1 = v_load(src1 + x);
+                    v_int16x8 in2 = v_load(src2 + x);
+                    v_uint16x8 t1 = v_reinterpret_as_u16(in1 > in2);
 
-                in1 = vld1q_s16(src1 + x + 8);
-                in2 = vld1q_s16(src2 + x + 8);
-                uint8x8_t t2 = vmovn_u16(vcgtq_s16(in1, in2));
+                    in1 = v_load(src1 + x + v_uint16x8::nlanes);
+                    in2 = v_load(src2 + x + v_uint16x8::nlanes);
+                    v_uint16x8 t2 = v_reinterpret_as_u16(in1 > in2);
 
-                vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask));
+                    v_store(dst+x, (v_pack(t1, t2)) ^ mask);
+                }
             }
-            #endif
-
+#endif
             for( ; x < width; x++ ){
                  dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
             }
@@ -2986,48 +2760,26 @@ void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2,
         for( ; height--; src1 += step1, src2 += step2, dst += step )
         {
             int x = 0;
-            #if CV_SSE2
-            if( USE_SSE2 )
+#if CV_SIMD128
+            if( hasSIMD128() )
             {
-                __m128i m128 =  code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (-1);
-                for( ; x <= width - 16; x += 16 )
-                {
-                    __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
-                    __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
-                    r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128);
-                    __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8));
-                    __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8));
-                    r01 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r01, r11), m128);
-                    r11 = _mm_packs_epi16(r00, r01);
-                    _mm_storeu_si128((__m128i*)(dst + x), r11);
-                }
-                if( x <= width - 8)
-                {
-                    __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
-                    __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
-                    r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128);
-                    r10 = _mm_packs_epi16(r00, r00);
-                    _mm_storel_epi64((__m128i*)(dst + x), r10);
+                v_uint8x16 mask = v_setall_u8((uchar)m);
+                const int dWidth = v_uint8x16::nlanes;
 
-                    x += 8;
-                }
-            }
-            #elif CV_NEON
-            uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255);
-
-            for( ; x <= width - 16; x += 16 )
-            {
-                int16x8_t in1 = vld1q_s16(src1 + x);
-                int16x8_t in2 = vld1q_s16(src2 + x);
-                uint8x8_t t1 = vmovn_u16(vceqq_s16(in1, in2));
+                for( ; x <= width - dWidth; x += dWidth )
+                {
+                    v_int16x8 in1 = v_load(src1 + x);
+                    v_int16x8 in2 = v_load(src2 + x);
+                    v_uint16x8 t1 = v_reinterpret_as_u16(in1 == in2);
 
-                in1 = vld1q_s16(src1 + x + 8);
-                in2 = vld1q_s16(src2 + x + 8);
-                uint8x8_t t2 = vmovn_u16(vceqq_s16(in1, in2));
+                    in1 = v_load(src1 + x + 8);
+                    in2 = v_load(src2 + x + 8);
+                    v_uint16x8 t2 = v_reinterpret_as_u16(in1 == in2);
 
-                vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask));
+                    v_store(dst+x, (v_pack(t1, t2)^ mask));
+                }
             }
-            #endif
+#endif
             for( ; x < width; x++ )
                 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
         }
@@ -3280,60 +3032,34 @@ addWeighted8u( const uchar* src1, size_t step1,
     {
         int x = 0;
 
-#if CV_SSE2
-        if( USE_SSE2 )
+#if CV_SIMD128
+        if( hasSIMD128() )
         {
-            __m128 a4 = _mm_set1_ps(alpha), b4 = _mm_set1_ps(beta), g4 = _mm_set1_ps(gamma);
-            __m128i z = _mm_setzero_si128();
+            v_float32x4 g = v_setall_f32(gamma);
+            v_float32x4 a = v_setall_f32(alpha);
+            v_float32x4 b = v_setall_f32(beta);
 
-            for( ; x <= width - 8; x += 8 )
+            for( ; x <= width - v_uint16x8::nlanes; x += v_uint16x8::nlanes )
             {
-                __m128i u = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src1 + x)), z);
-                __m128i v = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src2 + x)), z);
-
-                __m128 u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(u, z));
-                __m128 u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(u, z));
-                __m128 v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v, z));
-                __m128 v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, z));
-
-                u0 = _mm_add_ps(_mm_mul_ps(u0, a4), _mm_mul_ps(v0, b4));
-                u1 = _mm_add_ps(_mm_mul_ps(u1, a4), _mm_mul_ps(v1, b4));
-                u0 = _mm_add_ps(u0, g4); u1 = _mm_add_ps(u1, g4);
-
-                u = _mm_packs_epi32(_mm_cvtps_epi32(u0), _mm_cvtps_epi32(u1));
-                u = _mm_packus_epi16(u, u);
-
-                _mm_storel_epi64((__m128i*)(dst + x), u);
+                v_uint16x8 in1_16 = v_load_expand(src1 + x);
+                v_int32x4 in1_32_l, in1_32_h;
+                v_expand(v_reinterpret_as_s16(in1_16), in1_32_l, in1_32_h);
+                v_float32x4 in1_f_l = v_cvt_f32(in1_32_l);
+                v_float32x4 in1_f_h = v_cvt_f32(in1_32_h);
+
+                v_uint16x8 in2_16 = v_load_expand(src2 + x);
+                v_int32x4 in2_32_l, in2_32_h;
+                v_expand(v_reinterpret_as_s16(in2_16), in2_32_l, in2_32_h);
+                v_float32x4 in2_f_l = v_cvt_f32(in2_32_l);
+                v_float32x4 in2_f_h = v_cvt_f32(in2_32_h);
+
+                v_int32x4 out_l = v_round(in1_f_l * a + in2_f_l * b + g);
+                v_int32x4 out_h = v_round(in1_f_h * a + in2_f_h * b + g);
+
+                v_int16x8 out_16 = v_pack(out_l, out_h);
+                v_pack_u_store(dst + x, out_16);
             }
         }
-#elif CV_NEON
-        float32x4_t g = vdupq_n_f32 (gamma);
-
-        for( ; x <= width - 8; x += 8 )
-        {
-            uint8x8_t in1 = vld1_u8(src1+x);
-            uint16x8_t in1_16 = vmovl_u8(in1);
-            float32x4_t in1_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in1_16)));
-            float32x4_t in1_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in1_16)));
-
-            uint8x8_t in2 = vld1_u8(src2+x);
-            uint16x8_t in2_16 = vmovl_u8(in2);
-            float32x4_t in2_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in2_16)));
-            float32x4_t in2_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in2_16)));
-
-            float32x4_t out_f_l = vaddq_f32(vmulq_n_f32(in1_f_l, alpha), vmulq_n_f32(in2_f_l, beta));
-            float32x4_t out_f_h = vaddq_f32(vmulq_n_f32(in1_f_h, alpha), vmulq_n_f32(in2_f_h, beta));
-            out_f_l = vaddq_f32(out_f_l, g);
-            out_f_h = vaddq_f32(out_f_h, g);
-
-            uint16x4_t out_16_l = vqmovun_s32(cv_vrndq_s32_f32(out_f_l));
-            uint16x4_t out_16_h = vqmovun_s32(cv_vrndq_s32_f32(out_f_h));
-
-            uint16x8_t out_16 = vcombine_u16(out_16_l, out_16_h);
-            uint8x8_t out = vqmovn_u16(out_16);
-
-            vst1_u8(dst+x, out);
-        }
 #endif
         #if CV_ENABLE_UNROLLED
         for( ; x <= width - 4; x += 4 )
index 02bd04a..a75b924 100644 (file)
@@ -69,9 +69,14 @@ static const char* get_type_name(int type)
     return "unknown";
 }
 
+// std::tolower is int->int
+static char char_tolower(char ch)
+{
+    return (char)std::tolower((int)ch);
+}
 static bool parse_bool(std::string str)
 {
-    std::transform(str.begin(), str.end(), str.begin(), ::tolower);
+    std::transform(str.begin(), str.end(), str.begin(), char_tolower);
     std::istringstream is(str);
     bool b;
     is >> (str.size() > 1 ? std::boolalpha : std::noboolalpha) >> b;
index 20f0604..bf356ec 100644 (file)
@@ -863,45 +863,49 @@ private:
         d = alloc_1d<double> (n);
         e = alloc_1d<double> (n);
         ort = alloc_1d<double> (n);
-        // Reduce to Hessenberg form.
-        orthes();
-        // Reduce Hessenberg to real Schur form.
-        hqr2();
-        // Copy eigenvalues to OpenCV Matrix.
-        _eigenvalues.create(1, n, CV_64FC1);
-        for (int i = 0; i < n; i++) {
-            _eigenvalues.at<double> (0, i) = d[i];
+        try {
+            // Reduce to Hessenberg form.
+            orthes();
+            // Reduce Hessenberg to real Schur form.
+            hqr2();
+            // Copy eigenvalues to OpenCV Matrix.
+            _eigenvalues.create(1, n, CV_64FC1);
+            for (int i = 0; i < n; i++) {
+                _eigenvalues.at<double> (0, i) = d[i];
+            }
+            // Copy eigenvectors to OpenCV Matrix.
+            _eigenvectors.create(n, n, CV_64FC1);
+            for (int i = 0; i < n; i++)
+                for (int j = 0; j < n; j++)
+                    _eigenvectors.at<double> (i, j) = V[i][j];
+            // Deallocate the memory by releasing all internal working data.
+            release();
+        }
+        catch (...)
+        {
+            release();
+            throw;
         }
-        // Copy eigenvectors to OpenCV Matrix.
-        _eigenvectors.create(n, n, CV_64FC1);
-        for (int i = 0; i < n; i++)
-            for (int j = 0; j < n; j++)
-                _eigenvectors.at<double> (i, j) = V[i][j];
-        // Deallocate the memory by releasing all internal working data.
-        release();
     }
 
 public:
-    EigenvalueDecomposition()
-        : n(0), cdivr(0), cdivi(0), d(0), e(0), ort(0), V(0), H(0) {}
-
     // Initializes & computes the Eigenvalue Decomposition for a general matrix
     // given in src. This function is a port of the EigenvalueSolver in JAMA,
     // which has been released to public domain by The MathWorks and the
     // National Institute of Standards and Technology (NIST).
-    EigenvalueDecomposition(InputArray src) {
-        compute(src);
+    EigenvalueDecomposition(InputArray src, bool fallbackSymmetric = true) {
+        compute(src, fallbackSymmetric);
     }
 
     // This function computes the Eigenvalue Decomposition for a general matrix
     // given in src. This function is a port of the EigenvalueSolver in JAMA,
     // which has been released to public domain by The MathWorks and the
     // National Institute of Standards and Technology (NIST).
-    void compute(InputArray src)
+    void compute(InputArray src, bool fallbackSymmetric)
     {
         CV_INSTRUMENT_REGION()
 
-        if(isSymmetric(src)) {
+        if(fallbackSymmetric && isSymmetric(src)) {
             // Fall back to OpenCV for a symmetric matrix!
             cv::eigen(src, _eigenvalues, _eigenvectors);
         } else {
@@ -930,11 +934,60 @@ public:
     ~EigenvalueDecomposition() {}
 
     // Returns the eigenvalues of the Eigenvalue Decomposition.
-    Mat eigenvalues() {    return _eigenvalues; }
+    Mat eigenvalues() const { return _eigenvalues; }
     // Returns the eigenvectors of the Eigenvalue Decomposition.
-    Mat eigenvectors() { return _eigenvectors; }
+    Mat eigenvectors() const { return _eigenvectors; }
 };
 
+void eigenNonSymmetric(InputArray _src, OutputArray _evals, OutputArray _evects)
+{
+    CV_INSTRUMENT_REGION()
+
+    Mat src = _src.getMat();
+    int type = src.type();
+    size_t n = (size_t)src.rows;
+
+    CV_Assert(src.rows == src.cols);
+    CV_Assert(type == CV_32F || type == CV_64F);
+
+    Mat src64f;
+    if (type == CV_32F)
+        src.convertTo(src64f, CV_32FC1);
+    else
+        src64f = src;
+
+    EigenvalueDecomposition eigensystem(src64f, false);
+
+    // EigenvalueDecomposition returns transposed and non-sorted eigenvalues
+    std::vector<double> eigenvalues64f;
+    eigensystem.eigenvalues().copyTo(eigenvalues64f);
+    CV_Assert(eigenvalues64f.size() == n);
+
+    std::vector<int> sort_indexes(n);
+    cv::sortIdx(eigenvalues64f, sort_indexes, SORT_EVERY_ROW | SORT_DESCENDING);
+
+    std::vector<double> sorted_eigenvalues64f(n);
+    for (size_t i = 0; i < n; i++) sorted_eigenvalues64f[i] = eigenvalues64f[sort_indexes[i]];
+
+    Mat(sorted_eigenvalues64f).convertTo(_evals, type);
+
+    if( _evects.needed() )
+    {
+        Mat eigenvectors64f = eigensystem.eigenvectors().t(); // transpose
+        CV_Assert((size_t)eigenvectors64f.rows == n);
+        CV_Assert((size_t)eigenvectors64f.cols == n);
+        Mat_<double> sorted_eigenvectors64f((int)n, (int)n, CV_64FC1);
+        for (size_t i = 0; i < n; i++)
+        {
+            double* pDst = sorted_eigenvectors64f.ptr<double>((int)i);
+            double* pSrc = eigenvectors64f.ptr<double>(sort_indexes[(int)i]);
+            CV_Assert(pSrc != NULL);
+            memcpy(pDst, pSrc, n * sizeof(double));
+        }
+        sorted_eigenvectors64f.convertTo(_evects, type);
+    }
+}
+
 
 //------------------------------------------------------------------------------
 // Linear Discriminant Analysis implementation
index 2a6673d..094b196 100644 (file)
@@ -484,21 +484,31 @@ Mat::Mat(const Mat& m, const Range& _rowRange, const Range& _colRange)
     }
 
     *this = m;
-    if( _rowRange != Range::all() && _rowRange != Range(0,rows) )
+    try
     {
-        CV_Assert( 0 <= _rowRange.start && _rowRange.start <= _rowRange.end && _rowRange.end <= m.rows );
-        rows = _rowRange.size();
-        data += step*_rowRange.start;
-        flags |= SUBMATRIX_FLAG;
-    }
+        if( _rowRange != Range::all() && _rowRange != Range(0,rows) )
+        {
+            CV_Assert( 0 <= _rowRange.start && _rowRange.start <= _rowRange.end
+                       && _rowRange.end <= m.rows );
+            rows = _rowRange.size();
+            data += step*_rowRange.start;
+            flags |= SUBMATRIX_FLAG;
+        }
 
-    if( _colRange != Range::all() && _colRange != Range(0,cols) )
+        if( _colRange != Range::all() && _colRange != Range(0,cols) )
+        {
+            CV_Assert( 0 <= _colRange.start && _colRange.start <= _colRange.end
+                       && _colRange.end <= m.cols );
+            cols = _colRange.size();
+            data += _colRange.start*elemSize();
+            flags &= cols < m.cols ? ~CONTINUOUS_FLAG : -1;
+            flags |= SUBMATRIX_FLAG;
+        }
+    }
+    catch(...)
     {
-        CV_Assert( 0 <= _colRange.start && _colRange.start <= _colRange.end && _colRange.end <= m.cols );
-        cols = _colRange.size();
-        data += _colRange.start*elemSize();
-        flags &= cols < m.cols ? ~CONTINUOUS_FLAG : -1;
-        flags |= SUBMATRIX_FLAG;
+        release();
+        throw;
     }
 
     if( rows == 1 )
@@ -1140,78 +1150,45 @@ int Mat::checkVector(int _elemChannels, int _depth, bool _requireContinuous) con
     ? (int)(total()*channels()/_elemChannels) : -1;
 }
 
+template <typename T> static inline
+void scalarToRawData(const Scalar& s, T * const buf, const int cn, const int unroll_to)
+{
+    int i = 0;
+    for(; i < cn; i++)
+        buf[i] = saturate_cast<T>(s.val[i]);
+    for(; i < unroll_to; i++)
+        buf[i] = buf[i-cn];
+}
 
 void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
 {
     CV_INSTRUMENT_REGION()
 
-    int i, depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+    const int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
     CV_Assert(cn <= 4);
     switch(depth)
     {
     case CV_8U:
-        {
-        uchar* buf = (uchar*)_buf;
-        for(i = 0; i < cn; i++)
-            buf[i] = saturate_cast<uchar>(s.val[i]);
-        for(; i < unroll_to; i++)
-            buf[i] = buf[i-cn];
-        }
+        scalarToRawData<uchar>(s, (uchar*)_buf, cn, unroll_to);
         break;
     case CV_8S:
-        {
-        schar* buf = (schar*)_buf;
-        for(i = 0; i < cn; i++)
-            buf[i] = saturate_cast<schar>(s.val[i]);
-        for(; i < unroll_to; i++)
-            buf[i] = buf[i-cn];
-        }
+        scalarToRawData<schar>(s, (schar*)_buf, cn, unroll_to);
         break;
     case CV_16U:
-        {
-        ushort* buf = (ushort*)_buf;
-        for(i = 0; i < cn; i++)
-            buf[i] = saturate_cast<ushort>(s.val[i]);
-        for(; i < unroll_to; i++)
-            buf[i] = buf[i-cn];
-        }
+        scalarToRawData<ushort>(s, (ushort*)_buf, cn, unroll_to);
         break;
     case CV_16S:
-        {
-        short* buf = (short*)_buf;
-        for(i = 0; i < cn; i++)
-            buf[i] = saturate_cast<short>(s.val[i]);
-        for(; i < unroll_to; i++)
-            buf[i] = buf[i-cn];
-        }
+        scalarToRawData<short>(s, (short*)_buf, cn, unroll_to);
         break;
     case CV_32S:
-        {
-        int* buf = (int*)_buf;
-        for(i = 0; i < cn; i++)
-            buf[i] = saturate_cast<int>(s.val[i]);
-        for(; i < unroll_to; i++)
-            buf[i] = buf[i-cn];
-        }
+            scalarToRawData<int>(s, (int*)_buf, cn, unroll_to);
         break;
     case CV_32F:
-        {
-        float* buf = (float*)_buf;
-        for(i = 0; i < cn; i++)
-            buf[i] = saturate_cast<float>(s.val[i]);
-        for(; i < unroll_to; i++)
-            buf[i] = buf[i-cn];
-        }
+        scalarToRawData<float>(s, (float*)_buf, cn, unroll_to);
         break;
     case CV_64F:
-        {
-        double* buf = (double*)_buf;
-        for(i = 0; i < cn; i++)
-            buf[i] = saturate_cast<double>(s.val[i]);
-        for(; i < unroll_to; i++)
-            buf[i] = buf[i-cn];
+        scalarToRawData<double>(s, (double*)_buf, cn, unroll_to);
         break;
-        }
     default:
         CV_Error(CV_StsUnsupportedFormat,"");
     }
index 8fea1d2..b523308 100644 (file)
 #include <inttypes.h>
 #endif
 
+#include <opencv2/core/utils/configuration.private.hpp>
+
 #include "opencv2/core/ocl_genbase.hpp"
+#include "opencl_kernels_core.hpp"
 
 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
 #define CV_OPENCL_SHOW_RUN_ERRORS       0
@@ -993,6 +996,11 @@ static cl_device_id selectOpenCLDevice()
     return NULL;
 }
 #else
+// std::tolower is int->int
+static char char_tolower(char ch)
+{
+    return (char)std::tolower((int)ch);
+}
 static cl_device_id selectOpenCLDevice()
 {
     std::string platform, deviceName;
@@ -1077,7 +1085,7 @@ static cl_device_id selectOpenCLDevice()
     {
         int deviceType = 0;
         std::string tempStrDeviceType = deviceTypes[t];
-        std::transform( tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), tolower );
+        std::transform(tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), char_tolower);
 
         if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
             deviceType = Device::TYPE_GPU;
@@ -1840,9 +1848,35 @@ void initializeContextFromHandle(Context& ctx, void* platform, void* _context, v
 
 struct Queue::Impl
 {
-    Impl(const Context& c, const Device& d)
+    inline void __init()
     {
         refcount = 1;
+        handle = 0;
+        isProfilingQueue_ = false;
+    }
+
+    Impl(cl_command_queue q)
+    {
+        __init();
+        handle = q;
+
+        cl_command_queue_properties props = 0;
+        cl_int result = clGetCommandQueueInfo(handle, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &props, NULL);
+        CV_Assert(result && "clGetCommandQueueInfo(CL_QUEUE_PROPERTIES)");
+        isProfilingQueue_ = !!(props & CL_QUEUE_PROFILING_ENABLE);
+    }
+
+    Impl(cl_command_queue q, bool isProfilingQueue)
+    {
+        __init();
+        handle = q;
+        isProfilingQueue_ = isProfilingQueue;
+    }
+
+    Impl(const Context& c, const Device& d, bool withProfiling = false)
+    {
+        __init();
+
         const Context* pc = &c;
         cl_context ch = (cl_context)pc->ptr();
         if( !ch )
@@ -1854,8 +1888,10 @@ struct Queue::Impl
         if( !dh )
             dh = (cl_device_id)pc->device(0).ptr();
         cl_int retval = 0;
-        handle = clCreateCommandQueue(ch, dh, 0, &retval);
+        cl_command_queue_properties props = withProfiling ? CL_QUEUE_PROFILING_ENABLE : 0;
+        handle = clCreateCommandQueue(ch, dh, props, &retval);
         CV_OclDbgAssert(retval == CL_SUCCESS);
+        isProfilingQueue_ = withProfiling;
     }
 
     ~Impl()
@@ -1873,9 +1909,37 @@ struct Queue::Impl
         }
     }
 
+    const cv::ocl::Queue& getProfilingQueue(const cv::ocl::Queue& self)
+    {
+        if (isProfilingQueue_)
+            return self;
+
+        if (profiling_queue_.ptr())
+            return profiling_queue_;
+
+        cl_context ctx = 0;
+        CV_Assert(CL_SUCCESS == clGetCommandQueueInfo(handle, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, NULL));
+
+        cl_device_id device = 0;
+        CV_Assert(CL_SUCCESS == clGetCommandQueueInfo(handle, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL));
+
+        cl_int result = CL_SUCCESS;
+        cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE;
+        cl_command_queue q = clCreateCommandQueue(ctx, device, props, &result);
+        CV_Assert(result == CL_SUCCESS && "clCreateCommandQueue(with CL_QUEUE_PROFILING_ENABLE)");
+
+        Queue queue;
+        queue.p = new Impl(q, true);
+        profiling_queue_ = queue;
+
+        return profiling_queue_;
+    }
+
     IMPLEMENT_REFCOUNTABLE();
 
     cl_command_queue handle;
+    bool isProfilingQueue_;
+    cv::ocl::Queue profiling_queue_;
 };
 
 Queue::Queue()
@@ -1929,6 +1993,12 @@ void Queue::finish()
     }
 }
 
+const Queue& Queue::getProfilingQueue() const
+{
+    CV_Assert(p);
+    return p->getProfilingQueue(*this);
+}
+
 void* Queue::ptr() const
 {
     return p ? p->handle : 0;
@@ -2032,6 +2102,9 @@ struct Kernel::Impl
         release();
     }
 
+    bool run(int dims, size_t _globalsize[], size_t _localsize[],
+            bool sync, int64* timeNS, const Queue& q);
+
     ~Impl()
     {
         if(handle)
@@ -2259,19 +2332,15 @@ int Kernel::set(int i, const KernelArg& arg)
     return i+1;
 }
 
-
 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
                  bool sync, const Queue& q)
 {
-    CV_INSTRUMENT_REGION_OPENCL_RUN(p->name.c_str());
-
-    if(!p || !p->handle || p->isInProgress)
+    if (!p)
         return false;
 
-    cl_command_queue qq = getQueue(q);
     size_t globalsize[CV_MAX_DIM] = {1,1,1};
     size_t total = 1;
-    CV_Assert(_globalsize != 0);
+    CV_Assert(_globalsize != NULL);
     for (int i = 0; i < dims; i++)
     {
         size_t val = _localsize ? _localsize[i] :
@@ -2283,12 +2352,28 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
         globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val;
     }
     CV_Assert(total > 0);
-    if( p->haveTempDstUMats )
+
+    return p->run(dims, globalsize, _localsize, sync, NULL, q);
+}
+
+
+bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
+        bool sync, int64* timeNS, const Queue& q)
+{
+    CV_INSTRUMENT_REGION_OPENCL_RUN(p->name.c_str());
+
+    if (!handle || isInProgress)
+        return false;
+
+    cl_command_queue qq = getQueue(q);
+    if (haveTempDstUMats)
+        sync = true;
+    if (timeNS)
         sync = true;
     cl_event asyncEvent = 0;
-    cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
-                                           NULL, globalsize, _localsize, 0, 0,
-                                           sync ? 0 : &asyncEvent);
+    cl_int retval = clEnqueueNDRangeKernel(qq, handle, (cl_uint)dims,
+                                           NULL, globalsize, localsize, 0, 0,
+                                           (sync && !timeNS) ? 0 : &asyncEvent);
 #if CV_OPENCL_SHOW_RUN_ERRORS
     if (retval != CL_SUCCESS)
     {
@@ -2296,16 +2381,31 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
         fflush(stdout);
     }
 #endif
-    if( sync || retval != CL_SUCCESS )
+    if (sync || retval != CL_SUCCESS)
     {
         CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
-        p->cleanupUMats();
+        if (timeNS)
+        {
+            if (retval == CL_SUCCESS)
+            {
+                clWaitForEvents(1, &asyncEvent);
+                cl_ulong startTime, stopTime;
+                CV_Assert(CL_SUCCESS == clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_START, sizeof(startTime), &startTime, NULL));
+                CV_Assert(CL_SUCCESS == clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_END, sizeof(stopTime), &stopTime, NULL));
+                *timeNS = (int64)(stopTime - startTime);
+            }
+            else
+            {
+                *timeNS = -1;
+            }
+        }
+        cleanupUMats();
     }
     else
     {
-        p->addref();
-        p->isInProgress = true;
-        CV_OclDbgAssert(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);
+        addref();
+        isInProgress = true;
+        CV_OclDbgAssert(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, this) == CL_SUCCESS);
     }
     if (asyncEvent)
         clReleaseEvent(asyncEvent);
@@ -2336,6 +2436,17 @@ bool Kernel::runTask(bool sync, const Queue& q)
     return retval == CL_SUCCESS;
 }
 
+int64 Kernel::runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q_)
+{
+    CV_Assert(p && p->handle && !p->isInProgress);
+    Queue q = q_.ptr() ? q_ : Queue::getDefault();
+    CV_Assert(q.ptr());
+    q.finish(); // call clFinish() on base queue
+    Queue profilingQueue = q.getProfilingQueue();
+    int64 timeNs = -1;
+    bool res = p->run(dims, globalsize, localsize, true, &timeNs, profilingQueue);
+    return res ? timeNs : -1;
+}
 
 size_t Kernel::workGroupSize() const
 {
@@ -2378,25 +2489,156 @@ size_t Kernel::localMemSize() const
                                     sizeof(val), &val, &retsz) == CL_SUCCESS ? (size_t)val : 0;
 }
 
+
+
+///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
+
+struct ProgramSource::Impl
+{
+    Impl(const String& src)
+    {
+        init(cv::String(), cv::String(), src, cv::String());
+    }
+    Impl(const String& module, const String& name, const String& codeStr, const String& codeHash)
+    {
+        init(module, name, codeStr, codeHash);
+    }
+    void init(const String& module, const String& name, const String& codeStr, const String& codeHash)
+    {
+        refcount = 1;
+        module_ = module;
+        name_ = name;
+        codeStr_ = codeStr;
+        codeHash_ = codeHash;
+
+        isHashUpdated = false;
+        if (codeHash_.empty())
+        {
+            updateHash();
+            codeHash_ = cv::format("%08llx", hash_);
+        }
+    }
+
+    void updateHash()
+    {
+        hash_ = crc64((uchar*)codeStr_.c_str(), codeStr_.size());
+        isHashUpdated = true;
+    }
+
+    IMPLEMENT_REFCOUNTABLE();
+
+    String module_;
+    String name_;
+    String codeStr_;
+    String codeHash_;
+    // TODO std::vector<ProgramSource> includes_;
+
+    bool isHashUpdated;
+    ProgramSource::hash_t hash_;
+};
+
+
+ProgramSource::ProgramSource()
+{
+    p = 0;
+}
+
+ProgramSource::ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash)
+{
+    p = new Impl(module, name, codeStr, codeHash);
+}
+
+ProgramSource::ProgramSource(const char* prog)
+{
+    p = new Impl(prog);
+}
+
+ProgramSource::ProgramSource(const String& prog)
+{
+    p = new Impl(prog);
+}
+
+ProgramSource::~ProgramSource()
+{
+    if(p)
+        p->release();
+}
+
+ProgramSource::ProgramSource(const ProgramSource& prog)
+{
+    p = prog.p;
+    if(p)
+        p->addref();
+}
+
+ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
+{
+    Impl* newp = (Impl*)prog.p;
+    if(newp)
+        newp->addref();
+    if(p)
+        p->release();
+    p = newp;
+    return *this;
+}
+
+const String& ProgramSource::source() const
+{
+    CV_Assert(p);
+    return p->codeStr_;
+}
+
+ProgramSource::hash_t ProgramSource::hash() const
+{
+    CV_Assert(p);
+    if (!p->isHashUpdated)
+        p->updateHash();
+    return p->hash_;
+}
+
+
+internal::ProgramEntry::operator ProgramSource&() const
+{
+    if (this->pProgramSource == NULL)
+    {
+        cv::AutoLock lock(cv::getInitializationMutex());
+        if (this->pProgramSource == NULL)
+        {
+            ProgramSource* ps = new ProgramSource(this->module, this->name, this->programCode, this->programHash);
+            const_cast<ProgramEntry*>(this)->pProgramSource = ps;
+        }
+    }
+    return *this->pProgramSource;
+}
+
+
+
 /////////////////////////////////////////// Program /////////////////////////////////////////////
 
 struct Program::Impl
 {
     Impl(const ProgramSource& _src,
-         const String& _buildflags, String& errmsg)
+         const String& _buildflags, String& errmsg) :
+         src(_src),
+         buildflags(_buildflags),
+         handle(NULL)
     {
-        CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Compile: %" PRIx64 " options: %s", _src.hash(), _buildflags.c_str()).c_str());
         refcount = 1;
-        const Context& ctx = Context::getDefault();
-        src = _src;
-        buildflags = _buildflags;
+        compile(Context::getDefault(), errmsg);
+    }
+
+    bool compile(const Context& ctx, String& errmsg)
+    {
+        CV_Assert(handle == NULL);
+        CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Compile: %" PRIx64 " options: %s", src.hash(), buildflags.c_str()).c_str());
         const String& srcstr = src.source();
         const char* srcptr = srcstr.c_str();
         size_t srclen = srcstr.size();
         cl_int retval = 0;
 
         handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
-        if( handle && retval == CL_SUCCESS )
+        CV_OclDbgAssert(handle && retval == CL_SUCCESS);
+        if (handle && retval == CL_SUCCESS)
         {
             int i, n = (int)ctx.ndevices();
             AutoBuffer<void*> deviceListBuf(n+1);
@@ -2414,26 +2656,41 @@ struct Program::Impl
                                     (const cl_device_id*)deviceList,
                                     buildflags.c_str(), 0, 0);
 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
-            if( retval != CL_SUCCESS )
+            if (retval != CL_SUCCESS)
 #endif
             {
+                AutoBuffer<char, 4096> buffer; buffer[0] = 0;
+
                 size_t retsz = 0;
-                cl_int buildInfo_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
-                                               CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
-                if (buildInfo_retval == CL_SUCCESS && retsz > 1)
+                cl_int log_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
+                                                          CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
+                if (log_retval == CL_SUCCESS && retsz > 1)
                 {
-                    AutoBuffer<char> bufbuf(retsz + 16);
-                    char* buf = bufbuf;
-                    buildInfo_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
-                                                   CL_PROGRAM_BUILD_LOG, retsz+1, buf, &retsz);
-                    if (buildInfo_retval == CL_SUCCESS)
+                    buffer.resize(retsz + 16);
+                    log_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
+                                                       CL_PROGRAM_BUILD_LOG, retsz+1, (char*)buffer, &retsz);
+                    if (log_retval == CL_SUCCESS)
+                    {
+                        if (retsz < buffer.size())
+                            buffer[retsz] = 0;
+                        else
+                            buffer[buffer.size() - 1] = 0;
+                    }
+                    else
                     {
-                        // TODO It is useful to see kernel name & program file name also
-                        errmsg = String(buf);
-                        printf("OpenCL program build log: %s\n%s\n", buildflags.c_str(), errmsg.c_str());
-                        fflush(stdout);
+                        buffer[0] = 0;
                     }
                 }
+
+                errmsg = String(buffer);
+                printf("OpenCL program build log: %s (%s)\nStatus %d: %s\n%s\n%s\n",
+                        src.getImpl()->name_.c_str(), src.getImpl()->module_.c_str(),
+                        retval, getOpenCLErrorString(retval),
+                        buildflags.c_str(), errmsg.c_str());
+                fflush(stdout);
+
+                // don't remove "retval != CL_SUCCESS" condition here:
+                // it would break CV_OPENCL_ALWAYS_SHOW_BUILD_LOG mode
                 if (retval != CL_SUCCESS && handle)
                 {
                     clReleaseProgram(handle);
@@ -2441,6 +2698,7 @@ struct Program::Impl
                 }
             }
         }
+        return handle != NULL;
     }
 
     Impl(const String& _buf, const String& _buildflags)
@@ -2606,125 +2864,6 @@ String Program::getPrefix(const String& buildflags)
                   dev.name().c_str(), dev.driverVersion().c_str(), buildflags.c_str());
 }
 
-///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
-
-struct ProgramSource::Impl
-{
-    Impl(const String& src)
-    {
-        init(cv::String(), cv::String(), src, cv::String());
-    }
-    Impl(const String& module, const String& name, const String& codeStr, const String& codeHash)
-    {
-        init(module, name, codeStr, codeHash);
-    }
-    void init(const String& module, const String& name, const String& codeStr, const String& codeHash)
-    {
-        refcount = 1;
-        module_ = module;
-        name_ = name;
-        codeStr_ = codeStr;
-        codeHash_ = codeHash;
-
-        isHashUpdated = false;
-        if (codeHash_.empty())
-        {
-            updateHash();
-            codeHash_ = cv::format("%08llx", hash_);
-        }
-    }
-
-    void updateHash()
-    {
-        hash_ = crc64((uchar*)codeStr_.c_str(), codeStr_.size());
-        isHashUpdated = true;
-    }
-
-    IMPLEMENT_REFCOUNTABLE();
-
-    String module_;
-    String name_;
-    String codeStr_;
-    String codeHash_;
-    // TODO std::vector<ProgramSource> includes_;
-
-    bool isHashUpdated;
-    ProgramSource::hash_t hash_;
-};
-
-
-ProgramSource::ProgramSource()
-{
-    p = 0;
-}
-
-ProgramSource::ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash)
-{
-    p = new Impl(module, name, codeStr, codeHash);
-}
-
-ProgramSource::ProgramSource(const char* prog)
-{
-    p = new Impl(prog);
-}
-
-ProgramSource::ProgramSource(const String& prog)
-{
-    p = new Impl(prog);
-}
-
-ProgramSource::~ProgramSource()
-{
-    if(p)
-        p->release();
-}
-
-ProgramSource::ProgramSource(const ProgramSource& prog)
-{
-    p = prog.p;
-    if(p)
-        p->addref();
-}
-
-ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
-{
-    Impl* newp = (Impl*)prog.p;
-    if(newp)
-        newp->addref();
-    if(p)
-        p->release();
-    p = newp;
-    return *this;
-}
-
-const String& ProgramSource::source() const
-{
-    CV_Assert(p);
-    return p->codeStr_;
-}
-
-ProgramSource::hash_t ProgramSource::hash() const
-{
-    CV_Assert(p);
-    if (!p->isHashUpdated)
-        p->updateHash();
-    return p->hash_;
-}
-
-
-internal::ProgramEntry::operator ProgramSource&() const
-{
-    if (this->pProgramSource == NULL)
-    {
-        cv::AutoLock lock(cv::getInitializationMutex());
-        if (this->pProgramSource == NULL)
-        {
-            ProgramSource* ps = new ProgramSource(this->module, this->name, this->programCode, this->programHash);
-            const_cast<ProgramEntry*>(this)->pProgramSource = ps;
-        }
-    }
-    return *this->pProgramSource;
-}
 
 
 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
@@ -4610,6 +4749,102 @@ const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
     return buf;
 }
 
+const char* getOpenCLErrorString(int errorCode)
+{
+    switch (errorCode)
+    {
+    case   0: return "CL_SUCCESS";
+    case  -1: return "CL_DEVICE_NOT_FOUND";
+    case  -2: return "CL_DEVICE_NOT_AVAILABLE";
+    case  -3: return "CL_COMPILER_NOT_AVAILABLE";
+    case  -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
+    case  -5: return "CL_OUT_OF_RESOURCES";
+    case  -6: return "CL_OUT_OF_HOST_MEMORY";
+    case  -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
+    case  -8: return "CL_MEM_COPY_OVERLAP";
+    case  -9: return "CL_IMAGE_FORMAT_MISMATCH";
+    case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
+    case -11: return "CL_BUILD_PROGRAM_FAILURE";
+    case -12: return "CL_MAP_FAILURE";
+    case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
+    case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
+    case -15: return "CL_COMPILE_PROGRAM_FAILURE";
+    case -16: return "CL_LINKER_NOT_AVAILABLE";
+    case -17: return "CL_LINK_PROGRAM_FAILURE";
+    case -18: return "CL_DEVICE_PARTITION_FAILED";
+    case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
+    case -30: return "CL_INVALID_VALUE";
+    case -31: return "CL_INVALID_DEVICE_TYPE";
+    case -32: return "CL_INVALID_PLATFORM";
+    case -33: return "CL_INVALID_DEVICE";
+    case -34: return "CL_INVALID_CONTEXT";
+    case -35: return "CL_INVALID_QUEUE_PROPERTIES";
+    case -36: return "CL_INVALID_COMMAND_QUEUE";
+    case -37: return "CL_INVALID_HOST_PTR";
+    case -38: return "CL_INVALID_MEM_OBJECT";
+    case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
+    case -40: return "CL_INVALID_IMAGE_SIZE";
+    case -41: return "CL_INVALID_SAMPLER";
+    case -42: return "CL_INVALID_BINARY";
+    case -43: return "CL_INVALID_BUILD_OPTIONS";
+    case -44: return "CL_INVALID_PROGRAM";
+    case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
+    case -46: return "CL_INVALID_KERNEL_NAME";
+    case -47: return "CL_INVALID_KERNEL_DEFINITION";
+    case -48: return "CL_INVALID_KERNEL";
+    case -49: return "CL_INVALID_ARG_INDEX";
+    case -50: return "CL_INVALID_ARG_VALUE";
+    case -51: return "CL_INVALID_ARG_SIZE";
+    case -52: return "CL_INVALID_KERNEL_ARGS";
+    case -53: return "CL_INVALID_WORK_DIMENSION";
+    case -54: return "CL_INVALID_WORK_GROUP_SIZE";
+    case -55: return "CL_INVALID_WORK_ITEM_SIZE";
+    case -56: return "CL_INVALID_GLOBAL_OFFSET";
+    case -57: return "CL_INVALID_EVENT_WAIT_LIST";
+    case -58: return "CL_INVALID_EVENT";
+    case -59: return "CL_INVALID_OPERATION";
+    case -60: return "CL_INVALID_GL_OBJECT";
+    case -61: return "CL_INVALID_BUFFER_SIZE";
+    case -62: return "CL_INVALID_MIP_LEVEL";
+    case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
+    case -64: return "CL_INVALID_PROPERTY";
+    case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
+    case -66: return "CL_INVALID_COMPILER_OPTIONS";
+    case -67: return "CL_INVALID_LINKER_OPTIONS";
+    case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
+    case -69: return "CL_INVALID_PIPE_SIZE";
+    case -70: return "CL_INVALID_DEVICE_QUEUE";
+    case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
+    case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
+    case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
+    case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
+    case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
+    case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
+    case -1024: return "clBLAS: Functionality is not implemented";
+    case -1023: return "clBLAS: Library is not initialized yet";
+    case -1022: return "clBLAS: Matrix A is not a valid memory object";
+    case -1021: return "clBLAS: Matrix B is not a valid memory object";
+    case -1020: return "clBLAS: Matrix C is not a valid memory object";
+    case -1019: return "clBLAS: Vector X is not a valid memory object";
+    case -1018: return "clBLAS: Vector Y is not a valid memory object";
+    case -1017: return "clBLAS: An input dimension (M:N:K) is invalid";
+    case -1016: return "clBLAS: Leading dimension A must not be less than the "
+                       "size of the first dimension";
+    case -1015: return "clBLAS: Leading dimension B must not be less than the "
+                       "size of the second dimension";
+    case -1014: return "clBLAS: Leading dimension C must not be less than the "
+                       "size of the third dimension";
+    case -1013: return "clBLAS: The increment for a vector X must not be 0";
+    case -1012: return "clBLAS: The increment for a vector Y must not be 0";
+    case -1011: return "clBLAS: The memory object for Matrix A is too small";
+    case -1010: return "clBLAS: The memory object for Matrix B is too small";
+    case -1009: return "clBLAS: The memory object for Matrix C is too small";
+    case -1008: return "clBLAS: The memory object for Vector X is too small";
+    case -1007: return "clBLAS: The memory object for Vector Y is too small";
+    default: return "Unknown OpenCL error";
+    }
+}
+
 template <typename T>
 static std::string kerToStr(const Mat & k)
 {
@@ -5026,4 +5261,95 @@ bool internal::isCLBuffer(UMat& u)
     return true;
 }
 
+struct Timer::Impl
+{
+    const Queue queue;
+
+    Impl(const Queue& q)
+        : queue(q)
+    {
+    }
+
+    ~Impl(){}
+
+    void start()
+    {
+#ifdef HAVE_OPENCL
+        clFinish((cl_command_queue)queue.ptr());
+        timer.start();
+#endif
+    }
+
+    void stop()
+    {
+#ifdef HAVE_OPENCL
+        clFinish((cl_command_queue)queue.ptr());
+        timer.stop();
+#endif
+    }
+
+    float microSeconds()
+    {
+#ifdef HAVE_OPENCL
+        return (float)timer.getTimeMicro();
+#else
+        return 0;
+#endif
+    }
+
+    float milliSeconds()
+    {
+#ifdef HAVE_OPENCL
+        return (float)timer.getTimeMilli();
+#else
+        return 0;
+#endif
+    }
+
+    float seconds()
+    {
+#ifdef HAVE_OPENCL
+        return (float)timer.getTimeSec();
+#else
+        return 0;
+#endif
+    }
+    TickMeter timer;
+};
+
+Timer::Timer(const Queue& q)
+{
+    p = new Impl(q);
+}
+
+Timer::~Timer()
+{
+    if(p)
+    {
+        delete p;
+        p = 0;
+    }
+}
+
+void Timer::start()
+{
+    if(p)
+        p->start();
+}
+
+void Timer::stop()
+{
+    if(p)
+        p->stop();
+}
+
+float Timer::microSeconds()
+{ return p ? p->microSeconds() : 0; }
+
+float Timer::milliSeconds()
+{ return p ? p->milliSeconds() : 0; }
+
+float Timer::seconds()
+{ return p ? p->seconds() : 0; }
+
 }}
index 6bf426b..3cf261b 100644 (file)
@@ -759,15 +759,15 @@ OCL_FUNC_P(cl_mem, clCreateBuffer,
 
 /*
 OCL_FUNC(cl_int, clRetainCommandQueue, (cl_command_queue command_queue), (command_queue))
-
+*/
 OCL_FUNC(cl_int, clGetCommandQueueInfo,
- (cl_command_queue command_queue,
- cl_command_queue_info param_name,
- size_t param_value_size,
- void * param_value,
- size_t * param_value_size_ret),
- (command_queue, param_name, param_value_size, param_value, param_value_size_ret))
-
  (cl_command_queue command_queue,
  cl_command_queue_info param_name,
  size_t param_value_size,
  void * param_value,
  size_t * param_value_size_ret),
  (command_queue, param_name, param_value_size, param_value, param_value_size_ret))
+/*
 OCL_FUNC_P(cl_mem, clCreateSubBuffer,
     (cl_mem buffer,
     cl_mem_flags flags,
@@ -1202,6 +1202,19 @@ OCL_FUNC(cl_int, clSetEventCallback,
 
 OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
 
+OCL_FUNC(cl_int, clWaitForEvents,
+    (cl_uint num_events, const cl_event *event_list),
+    (num_events, event_list))
+
+
+OCL_FUNC(cl_int, clGetEventProfilingInfo, (
+    cl_event event,
+    cl_profiling_info param_name,
+    size_t param_value_size,
+    void *param_value,
+    size_t *param_value_size_ret),
+    (event, param_name, param_value_size, param_value, param_value_size_ret))
+
 }
 
 #endif
index d38e20d..9fa87f1 100644 (file)
@@ -85,7 +85,7 @@
 #include "opencv2/core/hal/intrin.hpp"
 #include "opencv2/core/sse_utils.hpp"
 #include "opencv2/core/neon_utils.hpp"
-
+#include "opencv2/core/vsx_utils.hpp"
 #include "arithm_core.hpp"
 #include "hal_replacement.hpp"
 
@@ -297,12 +297,6 @@ TLSData<CoreTLSData>& getCoreTlsData();
 #define CL_RUNTIME_EXPORT
 #endif
 
-namespace utils {
-bool getConfigurationParameterBool(const char* name, bool defaultValue);
-size_t getConfigurationParameterSizeT(const char* name, size_t defaultValue);
-cv::String getConfigurationParameterString(const char* name, const char* defaultValue);
-}
-
 extern bool __termination; // skip some cleanups, because process is terminating
                            // (for example, if ExitProcess() was already called)
 
index 7937d08..4532a9f 100644 (file)
@@ -1911,7 +1911,7 @@ static float64_t f64_sqrt( float64_t a )
     sigZ = ((uint_fast64_t) sig32Z<<32 | 1<<5) + ((uint_fast64_t) q<<3);
     /*------------------------------------------------------------------------
     *------------------------------------------------------------------------*/
-    if ( (sigZ & 0x1FF) < 1<<5 ) {
+    if ( (sigZ & 0x1FF) < 0x22 ) {
         sigZ &= ~(uint_fast64_t) 0x3F;
         shiftedSigZ = sigZ>>6;
         rem = (sigA<<52) - shiftedSigZ * shiftedSigZ;
index 60d3be9..d3232e5 100644 (file)
@@ -4365,7 +4365,7 @@ float normL2Sqr_(const float* a, const float* b, int n)
     for( ; j <= n - 8; j += 8 )
     {
         __m256 t0 = _mm256_sub_ps(_mm256_loadu_ps(a + j), _mm256_loadu_ps(b + j));
-#ifdef CV_FMA3
+#if CV_FMA3
         d0 = _mm256_fmadd_ps(t0, t0, d0);
 #else
         d0 = _mm256_add_ps(d0, _mm256_mul_ps(t0, t0));
index 2ec150e..ca811a0 100644 (file)
@@ -44,6 +44,7 @@
 #include "precomp.hpp"
 #include <iostream>
 
+#include <opencv2/core/utils/configuration.private.hpp>
 #include <opencv2/core/utils/trace.private.hpp>
 
 namespace cv {
@@ -79,6 +80,18 @@ Mutex* __initialization_mutex_initializer = &getInitializationMutex();
 #  include <cpu-features.h>
 #endif
 
+#ifndef __VSX__
+# if defined __PPC64__ && defined __linux__
+#   include "sys/auxv.h"
+#   ifndef AT_HWCAP2
+#     define AT_HWCAP2 26
+#   endif
+#   ifndef PPC_FEATURE2_ARCH_2_07
+#     define PPC_FEATURE2_ARCH_2_07 0x80000000
+#   endif
+# endif
+#endif
+
 #if defined _WIN32 || defined WINCE
 #ifndef _WIN32_WINNT           // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
   #define _WIN32_WINNT 0x0400  // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
@@ -294,6 +307,8 @@ struct HWFeatures
         g_hwFeatureNames[CPU_AVX_512VL] = "AVX512VL";
 
         g_hwFeatureNames[CPU_NEON] = "NEON";
+
+        g_hwFeatureNames[CPU_VSX] = "VSX";
     }
 
     void initialize(void)
@@ -503,6 +518,16 @@ struct HWFeatures
     #endif
     #endif
 
+    #ifdef __VSX__
+        have[CV_CPU_VSX] = true;
+    #elif (defined __PPC64__ && defined __linux__)
+        uint64 hwcaps = getauxval(AT_HWCAP);
+        uint64 hwcap2 = getauxval(AT_HWCAP2);
+        have[CV_CPU_VSX] = (hwcaps & PPC_FEATURE_PPC_LE && hwcaps & PPC_FEATURE_HAS_VSX && hwcap2 & PPC_FEATURE2_ARCH_2_07);
+    #else
+        have[CV_CPU_VSX] = false;
+    #endif
+
         int baseline_features[] = { CV_CPU_BASELINE_FEATURES };
         if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0])))
         {
index d915364..2305106 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <opencv2/core/utils/trace.hpp>
 #include <opencv2/core/utils/trace.private.hpp>
+#include <opencv2/core/utils/configuration.private.hpp>
 
 #include <cstdarg> // va_start
 
index 1be5fc6..84cebdb 100644 (file)
@@ -267,19 +267,22 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
     UMat hdr;
     if(!data)
         return hdr;
-    Size wholeSize;
-    Point ofs;
-    locateROI(wholeSize, ofs);
-    Size sz(cols, rows);
-    if (ofs.x != 0 || ofs.y != 0)
+    if (data != datastart)
     {
-        Mat src = *this;
-        int dtop = ofs.y;
-        int dbottom = wholeSize.height - src.rows - ofs.y;
-        int dleft = ofs.x;
-        int dright = wholeSize.width - src.cols - ofs.x;
-        src.adjustROI(dtop, dbottom, dleft, dright);
-        return src.getUMat(accessFlags, usageFlags)(cv::Rect(ofs.x, ofs.y, sz.width, sz.height));
+        Size wholeSize;
+        Point ofs;
+        locateROI(wholeSize, ofs);
+        Size sz(cols, rows);
+        if (ofs.x != 0 || ofs.y != 0)
+        {
+            Mat src = *this;
+            int dtop = ofs.y;
+            int dbottom = wholeSize.height - src.rows - ofs.y;
+            int dleft = ofs.x;
+            int dright = wholeSize.width - src.cols - ofs.x;
+            src.adjustROI(dtop, dbottom, dleft, dright);
+            return src.getUMat(accessFlags, usageFlags)(cv::Rect(ofs.x, ofs.y, sz.width, sz.height));
+        }
     }
     CV_Assert(data == datastart);
 
index bd51c74..44f4a72 100644 (file)
@@ -412,3 +412,124 @@ TEST(Core_Eigen, scalar_32) {Core_EigenTest_Scalar_32 test; test.safe_run(); }
 TEST(Core_Eigen, scalar_64) {Core_EigenTest_Scalar_64 test; test.safe_run(); }
 TEST(Core_Eigen, vector_32) { Core_EigenTest_32 test; test.safe_run(); }
 TEST(Core_Eigen, vector_64) { Core_EigenTest_64 test; test.safe_run(); }
+
+template<typename T>
+static void testEigen(const Mat_<T>& src, const Mat_<T>& expected_eigenvalues, bool runSymmetric = false)
+{
+    SCOPED_TRACE(runSymmetric ? "cv::eigen" : "cv::eigenNonSymmetric");
+
+    int type = traits::Type<T>::value;
+    const T eps = 1e-6f;
+
+    Mat eigenvalues, eigenvectors, eigenvalues0;
+
+    if (runSymmetric)
+    {
+        cv::eigen(src, eigenvalues0, noArray());
+        cv::eigen(src, eigenvalues, eigenvectors);
+    }
+    else
+    {
+        cv::eigenNonSymmetric(src, eigenvalues0, noArray());
+        cv::eigenNonSymmetric(src, eigenvalues, eigenvectors);
+    }
+#if 0
+    std::cout << "src = " << src << std::endl;
+    std::cout << "eigenvalues.t() = " << eigenvalues.t() << std::endl;
+    std::cout << "eigenvectors = " << eigenvectors << std::endl;
+#endif
+    ASSERT_EQ(type, eigenvalues0.type());
+    ASSERT_EQ(type, eigenvalues.type());
+    ASSERT_EQ(type, eigenvectors.type());
+
+    ASSERT_EQ(src.rows, eigenvalues.rows);
+    ASSERT_EQ(eigenvalues.rows, eigenvectors.rows);
+    ASSERT_EQ(src.rows, eigenvectors.cols);
+
+    EXPECT_LT(cvtest::norm(eigenvalues, eigenvalues0, NORM_INF), eps);
+
+    // check definition: src*eigenvectors.row(i).t() = eigenvalues.at<srcType>(i)*eigenvectors.row(i).t()
+    for (int i = 0; i < src.rows; i++)
+    {
+        EXPECT_NEAR(eigenvalues.at<T>(i), expected_eigenvalues(i), eps) << "i=" << i;
+        Mat lhs = src*eigenvectors.row(i).t();
+        Mat rhs = eigenvalues.at<T>(i)*eigenvectors.row(i).t();
+        EXPECT_LT(cvtest::norm(lhs, rhs, NORM_INF), eps)
+                << "i=" << i << " eigenvalue=" << eigenvalues.at<T>(i) << std::endl
+                << "lhs=" << lhs.t() << std::endl
+                << "rhs=" << rhs.t();
+    }
+}
+
+template<typename T>
+static void testEigenSymmetric3x3()
+{
+    /*const*/ T values_[] = {
+            2, -1, 0,
+            -1, 2, -1,
+            0, -1, 2
+    };
+    Mat_<T> src(3, 3, values_);
+
+    /*const*/ T expected_eigenvalues_[] = { 3.414213562373095f, 2, 0.585786437626905f };
+    Mat_<T> expected_eigenvalues(3, 1, expected_eigenvalues_);
+
+    testEigen(src, expected_eigenvalues);
+    testEigen(src, expected_eigenvalues, true);
+}
+TEST(Core_EigenSymmetric, float3x3) { testEigenSymmetric3x3<float>(); }
+TEST(Core_EigenSymmetric, double3x3) { testEigenSymmetric3x3<double>(); }
+
+template<typename T>
+static void testEigenSymmetric5x5()
+{
+    /*const*/ T values_[5*5] = {
+            5, -1, 0, 2, 1,
+            -1, 4, -1, 0, 0,
+            0, -1, 3, 1, -1,
+            2, 0, 1, 4, 0,
+            1, 0, -1, 0, 1
+    };
+    Mat_<T> src(5, 5, values_);
+
+    /*const*/ T expected_eigenvalues_[] = { 7.028919644935684f, 4.406130784616501f, 3.73626552682258f, 1.438067799899037f, 0.390616243726198f };
+    Mat_<T> expected_eigenvalues(5, 1, expected_eigenvalues_);
+
+    testEigen(src, expected_eigenvalues);
+    testEigen(src, expected_eigenvalues, true);
+}
+TEST(Core_EigenSymmetric, float5x5) { testEigenSymmetric5x5<float>(); }
+TEST(Core_EigenSymmetric, double5x5) { testEigenSymmetric5x5<double>(); }
+
+
+template<typename T>
+static void testEigen2x2()
+{
+    /*const*/ T values_[] = { 4, 1, 6, 3 };
+    Mat_<T> src(2, 2, values_);
+
+    /*const*/ T expected_eigenvalues_[] = { 6, 1 };
+    Mat_<T> expected_eigenvalues(2, 1, expected_eigenvalues_);
+
+    testEigen(src, expected_eigenvalues);
+}
+TEST(Core_EigenNonSymmetric, float2x2) { testEigen2x2<float>(); }
+TEST(Core_EigenNonSymmetric, double2x2) { testEigen2x2<double>(); }
+
+template<typename T>
+static void testEigen3x3()
+{
+    /*const*/ T values_[3*3] = {
+            3,1,0,
+            0,3,1,
+            0,0,3
+    };
+    Mat_<T> src(3, 3, values_);
+
+    /*const*/ T expected_eigenvalues_[] = { 3, 3, 3 };
+    Mat_<T> expected_eigenvalues(3, 1, expected_eigenvalues_);
+
+    testEigen(src, expected_eigenvalues);
+}
+TEST(Core_EigenNonSymmetric, float3x3) { testEigen3x3<float>(); }
+TEST(Core_EigenNonSymmetric, double3x3) { testEigen3x3<double>(); }
index 68a07a5..64a4574 100644 (file)
@@ -47,7 +47,9 @@
 #define OPENCV_CUDEV_UTIL_SATURATE_CAST_HPP
 
 #include "../common.hpp"
-#include "opencv2/core/private.cuda.hpp"
+#if __CUDACC_VER_MAJOR__ >= 9
+#include <cuda_fp16.h>
+#endif
 
 namespace cv { namespace cudev {
 
@@ -275,7 +277,7 @@ template <typename T, typename D> __device__ __forceinline__ D cast_fp16(T v);
 
 template <> __device__ __forceinline__ float cast_fp16<short, float>(short v)
 {
-#if __CUDACC_VER_MAJOR__  >= 9
+#if __CUDACC_VER_MAJOR__ >= 9
   return float(*(__half*)&v);
 #else
     return __half2float(v);
@@ -284,7 +286,7 @@ template <> __device__ __forceinline__ float cast_fp16<short, float>(short v)
 
 template <> __device__ __forceinline__ short cast_fp16<float, short>(float v)
 {
-#if __CUDACC_VER_MAJOR__  >= 9
+#if __CUDACC_VER_MAJOR__ >= 9
   __half h(v);
   return *(short*)&v;
 #else
index d0bc332..866f544 100644 (file)
@@ -21,6 +21,8 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-shadow -Wno-parentheses -Wmaybe-uninit
 )
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4701 /wd4100)
 
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/ocl4dnn/include ${OPENCL_INCLUDE_DIRS})
+
 if(MSVC)
   add_definitions( -D_CRT_SECURE_NO_WARNINGS=1 )
   ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4244 /wd4267 /wd4018 /wd4355 /wd4800 /wd4251 /wd4996 /wd4146
index 4c08fb6..9a4734a 100644 (file)
@@ -84,7 +84,9 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
         /** Creates instance of LSTM layer */
         static Ptr<LSTMLayer> create(const LayerParams& params);
 
-        /** Set trained weights for LSTM layer.
+        /** @deprecated Use LayerParams::blobs instead.
+        @brief Set trained weights for LSTM layer.
+
         LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights.
 
         Let @f$x_t@f$ be current input, @f$h_t@f$ be current output, @f$c_t@f$ be current state.
@@ -114,7 +116,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
         @param Wx is matrix defining how current input is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_x @f$)
         @param b  is bias vector (i.e. according to abovemtioned notation is @f$ b @f$)
         */
-        virtual void setWeights(const Mat &Wh, const Mat &Wx, const Mat &b) = 0;
+        CV_DEPRECATED virtual void setWeights(const Mat &Wh, const Mat &Wx, const Mat &b) = 0;
 
         /** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape.
           * @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used,
@@ -122,7 +124,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
           */
         virtual void setOutShape(const MatShape &outTailShape = MatShape()) = 0;
 
-        /** @brief Specifies either interpet first dimension of input blob as timestamp dimenion either as sample.
+        /** @deprecated Use flag `produce_cell_output` in LayerParams.
+          * @brief Specifies either interpet first dimension of input blob as timestamp dimenion either as sample.
           *
           * If flag is set to true then shape of input blob will be interpeted as [`T`, `N`, `[data dims]`] where `T` specifies number of timpestamps, `N` is number of independent streams.
           * In this case each forward() call will iterate through `T` timestamps and update layer's state `T` times.
@@ -130,12 +133,13 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
           * If flag is set to false then shape of input blob will be interpeted as [`N`, `[data dims]`].
           * In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`].
           */
-        virtual void setUseTimstampsDim(bool use = true) = 0;
+        CV_DEPRECATED virtual void setUseTimstampsDim(bool use = true) = 0;
 
-        /** @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
+        /** @deprecated Use flag `use_timestamp_dim` in LayerParams.
+         * @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
          * @details Shape of the second output is the same as first output.
          */
-        virtual void setProduceCellOutput(bool produce = false) = 0;
+        CV_DEPRECATED virtual void setProduceCellOutput(bool produce = false) = 0;
 
         /* In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$).
          * @param input should contain packed values @f$x_t@f$
@@ -199,6 +203,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
     public:
         Size kernel, stride, pad, dilation, adjustPad;
         String padMode;
+        int numOutput;
     };
 
     class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer
@@ -322,11 +327,41 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
         static Ptr<SplitLayer> create(const LayerParams &params);
     };
 
+    /**
+     * Slice layer has several modes:
+     * 1. Caffe mode
+     * @param[in] axis Axis of split operation
+     * @param[in] slice_point Array of split points
+     *
+     * Number of output blobs equals to number of split points plus one. The
+     * first blob is a slice on input from 0 to @p slice_point[0] - 1 by @p axis,
+     * the second output blob is a slice of input from @p slice_point[0] to
+     * @p slice_point[1] - 1 by @p axis and the last output blob is a slice of
+     * input from @p slice_point[-1] up to the end of @p axis size.
+     *
+     * 2. TensorFlow mode
+     * @param begin Vector of start indices
+     * @param size Vector of sizes
+     *
+     * More convinient numpy-like slice. One and only output blob
+     * is a slice `input[begin[0]:begin[0]+size[0], begin[1]:begin[1]+size[1], ...]`
+     *
+     * 3. Torch mode
+     * @param axis Axis of split operation
+     *
+     * Split input blob on the equal parts by @p axis.
+     */
     class CV_EXPORTS SliceLayer : public Layer
     {
     public:
+        /**
+         * @brief Vector of slice ranges.
+         *
+         * The first dimension equals number of output blobs.
+         * Inner vector has slice ranges for the first number of input dimensions.
+         */
+        std::vector<std::vector<Range> > sliceRanges;
         int axis;
-        std::vector<int> sliceIndices;
 
         static Ptr<SliceLayer> create(const LayerParams &params);
     };
@@ -337,6 +372,25 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
         static Ptr<PermuteLayer> create(const LayerParams& params);
     };
 
+    /**
+     * @brief Adds extra values for specific axes.
+     * @param paddings Vector of paddings in format
+     *                 @code
+     *                 [ pad_before, pad_after,  // [0]th dimension
+     *                   pad_before, pad_after,  // [1]st dimension
+     *                   ...
+     *                   pad_before, pad_after ] // [n]th dimension
+     *                 @endcode
+     *                 that represents number of padded values at every dimension
+     *                 starting from the first one. The rest of dimensions won't
+     *                 be padded.
+     * @param value Value to be padded. Defaults to zero.
+     * @param input_dims Torch's parameter. If @p input_dims is not equal to the
+     *                   actual input dimensionality then the `[0]th` dimension
+     *                   is considered as a batch dimension and @p paddings are shifted
+     *                   to a one dimension. Defaults to `-1` that means padding
+     *                   corresponding to @p paddings.
+     */
     class CV_EXPORTS PaddingLayer : public Layer
     {
     public:
@@ -368,7 +422,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
     class CV_EXPORTS ChannelsPReLULayer : public ActivationLayer
     {
     public:
-        static Ptr<ChannelsPReLULayer> create(const LayerParams& params);
+        static Ptr<Layer> create(const LayerParams& params);
     };
 
     class CV_EXPORTS ELULayer : public ActivationLayer
@@ -497,6 +551,17 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
         static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
     };
 
+    /**
+     * @brief Resize input 4-dimensional blob by nearest neghbor strategy.
+     *
+     * Layer is used to support TensorFlow's resize_nearest_neighbor op.
+     */
+    class CV_EXPORTS ResizeNearestNeighborLayer : public Layer
+    {
+    public:
+        static Ptr<ResizeNearestNeighborLayer> create(const LayerParams& params);
+    };
+
 //! @}
 //! @}
 CV__DNN_EXPERIMENTAL_NS_END
index 6c19a1d..59970e6 100644 (file)
@@ -297,6 +297,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
 
         CV_PROP String name; //!< Name of the layer instance, can be used for logging or other internal purposes.
         CV_PROP String type; //!< Type name which was used for creating layer by layer factory.
+        CV_PROP int preferableTarget; //!< prefer target for layer forwarding
 
         Layer();
         explicit Layer(const LayerParams &params);      //!< Initializes only #name, #type and #blobs fields.
@@ -636,7 +637,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
     /** @brief Reads a network model stored in Tensorflow model file.
       * @details This is shortcut consisting from createTensorflowImporter and Net::populateNet calls.
       */
-    CV_EXPORTS_W Net readNetFromTensorflow(const String &model);
+    CV_EXPORTS_W Net readNetFromTensorflow(const String &model, const String &config = String());
 
     /** @brief Reads a network model stored in Torch model file.
       * @details This is shortcut consisting from createTorchImporter and Net::populateNet calls.
diff --git a/modules/dnn/perf/opencl/perf_convolution.cpp b/modules/dnn/perf/opencl/perf_convolution.cpp
new file mode 100644 (file)
index 0000000..3620579
--- /dev/null
@@ -0,0 +1,118 @@
+#include "../perf_precomp.hpp"
+#include "opencv2/ts/ocl_perf.hpp"
+#include <opencv2/dnn/shape_utils.hpp>
+
+#ifdef HAVE_OPENCL
+
+namespace cvtest
+{
+namespace ocl
+{
+
+using std::tr1::tuple;
+using std::tr1::get;
+using std::tr1::make_tuple;
+using std::make_pair;
+using namespace perf;
+using namespace testing;
+using namespace cv;
+using namespace cv::dnn;
+
+enum {STRIDE_OFF = 1, STRIDE_ON = 2};
+CV_ENUM(StrideSize, STRIDE_OFF, STRIDE_ON);
+
+enum {GROUP_OFF = 1, GROUP_2 = 2};
+CV_ENUM(GroupSize, GROUP_OFF, GROUP_2);
+
+//Squared Size
+#define SSZ(n) cv::Size(n, n)
+
+typedef std::pair<MatShape, int> InpShapeNumOut;
+typedef tuple<Size, InpShapeNumOut, GroupSize, StrideSize> ConvParam; //kernel_size, inp shape, groups, stride
+typedef TestBaseWithParam<ConvParam> ConvolutionPerfTest;
+
+static inline MatShape blobShape(int count, int nplanes, int height, int width)
+{
+    int data[] = {count, nplanes, height, width};
+    return MatShape(data, data+4);
+}
+
+OCL_PERF_TEST_P( ConvolutionPerfTest, perf, Combine(
+    Values(Size(1, 1), Size(3, 3), Size(5, 5), Size(11, 11)),
+    Values(make_pair(blobShape(1,   4, 224, 224),  64),
+           make_pair(blobShape(1,  64, 112, 122), 128),
+           make_pair(blobShape(1, 256,  28,  28), 512)),
+    GroupSize::all(),
+    StrideSize::all())
+)
+{
+    RNG rng(0);
+
+    ConvParam params = GetParam();
+    int ksz     = get<0>(params).width;
+    MatShape inpShape = get<1>(params).first;
+    int outCn   = get<1>(params).second;
+    int groups  = get<2>(params);
+    int stride  = (ksz >= 11) ? 4 : (int)get<3>(params);
+
+    int inpCn = inpShape[1];
+    int wgtSize[] = { outCn, inpCn/groups, ksz, ksz };
+    int biasSize[] = { outCn, 1, 1, 1 };
+    const int wtype = CV_32F;
+    Mat wgtBlob(4, wgtSize, wtype), biasBlob(4, biasSize, wtype);
+    Mat inpBlob(4, &inpShape[0], wtype);
+    rng.fill(biasBlob, RNG::UNIFORM, -1, +1);
+    rng.fill(wgtBlob, RNG::UNIFORM, -1, +1);
+    rng.fill(inpBlob, RNG::UNIFORM, -1, +1);
+
+    LayerParams lp;
+    lp.set("num_output", outCn);
+    lp.set("group", groups);
+    lp.set("stride", stride);
+    lp.set("kernel_size", ksz);
+    lp.blobs.reserve(2);
+    lp.blobs.push_back(wgtBlob);
+    lp.blobs.push_back(biasBlob);
+
+    std::vector<Mat*> inpBlobs(1, &inpBlob);
+    std::vector<Mat> outBlobs, internalBlobs;
+
+    cv::setNumThreads(cv::getNumberOfCPUs());
+
+    Ptr<Layer> layer = cv::dnn::LayerFactory::createLayerInstance("Convolution", lp);
+    std::vector<MatShape> inputShapes(1, shape(inpBlob)), outShapes, internals;
+    layer->getMemoryShapes(inputShapes, 0, outShapes, internals);
+    for (int i = 0; i < outShapes.size(); i++)
+    {
+        outBlobs.push_back(Mat(outShapes[i], CV_32F));
+    }
+    for (int i = 0; i < internals.size(); i++)
+    {
+        internalBlobs.push_back(Mat());
+        if (total(internals[i]))
+            internalBlobs.back().create(internals[i], CV_32F);
+    }
+
+    layer->finalize(inpBlobs, outBlobs);
+    layer->preferableTarget = DNN_TARGET_OPENCL;
+
+    Mat inpBlob2D = inpBlob.reshape(1, outCn);
+    Mat wgtBlob2D = wgtBlob.reshape(1, outCn*(inpCn/groups));
+    Mat outBlob2D = outBlobs[0].reshape(1, outBlobs[0].size[0]);
+    declare.in(inpBlob2D, wgtBlob2D, WARMUP_RNG).out(outBlob2D).tbb_threads(cv::getNumThreads());
+
+    // warmup
+    layer->forward(inpBlobs, outBlobs, internalBlobs);
+
+    TEST_CYCLE()
+    {
+        layer->forward(inpBlobs, outBlobs, internalBlobs);
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+}
+}
+
+#endif
index 502c5ef..7429885 100644 (file)
@@ -1,27 +1,15 @@
 #include "perf_precomp.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
 
-namespace cvtest
+namespace
 {
 
-using std::tr1::tuple;
-using std::tr1::get;
-using std::tr1::make_tuple;
-using std::make_pair;
-using namespace perf;
-using namespace testing;
-using namespace cv;
-using namespace cv::dnn;
-
 enum {STRIDE_OFF = 1, STRIDE_ON = 2};
 CV_ENUM(StrideSize, STRIDE_OFF, STRIDE_ON);
 
 enum {GROUP_OFF = 1, GROUP_2 = 2};
 CV_ENUM(GroupSize, GROUP_OFF, GROUP_2);
 
-//Squared Size
-#define SSZ(n) cv::Size(n, n)
-
 typedef std::pair<MatShape, int> InpShapeNumOut;
 typedef tuple<Size, InpShapeNumOut, GroupSize, StrideSize> ConvParam; //kernel_size, inp shape, groups, stride
 typedef TestBaseWithParam<ConvParam> ConvolutionPerfTest;
@@ -77,11 +65,11 @@ PERF_TEST_P( ConvolutionPerfTest, perf, Combine(
     Ptr<Layer> layer = cv::dnn::LayerFactory::createLayerInstance("Convolution", lp);
     std::vector<MatShape> inputShapes(1, shape(inpBlob)), outShapes, internals;
     layer->getMemoryShapes(inputShapes, 0, outShapes, internals);
-    for (int i = 0; i < outShapes.size(); i++)
+    for (size_t i = 0; i < outShapes.size(); i++)
     {
         outBlobs.push_back(Mat(outShapes[i], CV_32F));
     }
-    for (int i = 0; i < internals.size(); i++)
+    for (size_t i = 0; i < internals.size(); i++)
     {
         internalBlobs.push_back(Mat());
         if (total(internals[i]))
@@ -95,12 +83,13 @@ PERF_TEST_P( ConvolutionPerfTest, perf, Combine(
     Mat outBlob2D = outBlobs[0].reshape(1, outBlobs[0].size[0]);
     declare.in(inpBlob2D, wgtBlob2D, WARMUP_RNG).out(outBlob2D).tbb_threads(cv::getNumThreads());
 
-    TEST_CYCLE_N(10)
-    {
+    layer->forward(inpBlobs, outBlobs, internalBlobs); /// warmup
+
+    PERF_SAMPLE_BEGIN()
         layer->forward(inpBlobs, outBlobs, internalBlobs);
-    }
+    PERF_SAMPLE_END()
 
     SANITY_CHECK_NOTHING();
 }
 
-}
+} // namespace
diff --git a/modules/dnn/perf/perf_halide_net.cpp b/modules/dnn/perf/perf_halide_net.cpp
deleted file mode 100644 (file)
index 84e6305..0000000
+++ /dev/null
@@ -1,174 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-//
-// Copyright (C) 2017, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-
-#include "perf_precomp.hpp"
-
-namespace cvtest
-{
-
-#ifdef HAVE_HALIDE
-using namespace cv;
-using namespace dnn;
-
-static void loadNet(std::string weights, std::string proto, std::string scheduler,
-                    int inWidth, int inHeight, const std::string& outputLayer,
-                    const std::string& framework, int targetId, Net* net)
-{
-    Mat input(inHeight, inWidth, CV_32FC3);
-    randu(input, 0.0f, 1.0f);
-
-    weights = findDataFile(weights, false);
-    if (!proto.empty())
-        proto = findDataFile(proto, false);
-    if (!scheduler.empty())
-        scheduler = findDataFile(scheduler, false);
-    if (framework == "caffe")
-    {
-        *net = cv::dnn::readNetFromCaffe(proto, weights);
-    }
-    else if (framework == "torch")
-    {
-        *net = cv::dnn::readNetFromTorch(weights);
-    }
-    else if (framework == "tensorflow")
-    {
-        *net = cv::dnn::readNetFromTensorflow(weights);
-    }
-    else
-        CV_Error(Error::StsNotImplemented, "Unknown framework " + framework);
-
-    net->setInput(blobFromImage(input, 1.0, Size(), Scalar(), false));
-    net->setPreferableBackend(DNN_BACKEND_HALIDE);
-    net->setPreferableTarget(targetId);
-    net->setHalideScheduler(scheduler);
-    net->forward(outputLayer);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// CPU target
-////////////////////////////////////////////////////////////////////////////////
-PERF_TEST(GoogLeNet, HalidePerfTest)
-{
-    Net net;
-    loadNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
-            "", 224, 224, "prob", "caffe", DNN_TARGET_CPU, &net);
-    TEST_CYCLE() net.forward();
-    SANITY_CHECK_NOTHING();
-}
-
-PERF_TEST(AlexNet, HalidePerfTest)
-{
-    Net net;
-    loadNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt",
-            "dnn/halide_scheduler_alexnet.yml", 227, 227, "prob", "caffe",
-            DNN_TARGET_CPU, &net);
-    TEST_CYCLE() net.forward();
-    SANITY_CHECK_NOTHING();
-}
-
-PERF_TEST(ResNet50, HalidePerfTest)
-{
-    Net net;
-    loadNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt",
-            "dnn/halide_scheduler_resnet_50.yml", 224, 224, "prob", "caffe",
-            DNN_TARGET_CPU, &net);
-    TEST_CYCLE() net.forward();
-    SANITY_CHECK_NOTHING();
-}
-
-PERF_TEST(SqueezeNet_v1_1, HalidePerfTest)
-{
-    Net net;
-    loadNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt",
-            "dnn/halide_scheduler_squeezenet_v1_1.yml", 227, 227, "prob",
-            "caffe", DNN_TARGET_CPU, &net);
-    TEST_CYCLE() net.forward();
-    SANITY_CHECK_NOTHING();
-}
-
-PERF_TEST(Inception_5h, HalidePerfTest)
-{
-    Net net;
-    loadNet("dnn/tensorflow_inception_graph.pb", "",
-            "dnn/halide_scheduler_inception_5h.yml",
-            224, 224, "softmax2", "tensorflow", DNN_TARGET_CPU, &net);
-    TEST_CYCLE() net.forward("softmax2");
-    SANITY_CHECK_NOTHING();
-}
-
-PERF_TEST(ENet, HalidePerfTest)
-{
-    Net net;
-    loadNet("dnn/Enet-model-best.net", "", "dnn/halide_scheduler_enet.yml",
-            512, 256, "l367_Deconvolution", "torch", DNN_TARGET_CPU, &net);
-    TEST_CYCLE() net.forward();
-    SANITY_CHECK_NOTHING();
-}
-////////////////////////////////////////////////////////////////////////////////
-// OpenCL target
-////////////////////////////////////////////////////////////////////////////////
-PERF_TEST(GoogLeNet_opencl, HalidePerfTest)
-{
-    Net net;
-    loadNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
-            "", 227, 227, "prob", "caffe", DNN_TARGET_OPENCL, &net);
-    TEST_CYCLE() net.forward();
-    SANITY_CHECK_NOTHING();
-}
-
-PERF_TEST(AlexNet_opencl, HalidePerfTest)
-{
-    Net net;
-    loadNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt",
-            "dnn/halide_scheduler_opencl_alexnet.yml", 227, 227, "prob", "caffe",
-            DNN_TARGET_OPENCL, &net);
-    TEST_CYCLE() net.forward();
-    SANITY_CHECK_NOTHING();
-}
-
-PERF_TEST(ResNet50_opencl, HalidePerfTest)
-{
-    Net net;
-    loadNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt",
-            "dnn/halide_scheduler_opencl_resnet_50.yml", 224, 224, "prob", "caffe",
-            DNN_TARGET_OPENCL, &net);
-    TEST_CYCLE() net.forward();
-    SANITY_CHECK_NOTHING();
-}
-
-
-PERF_TEST(SqueezeNet_v1_1_opencl, HalidePerfTest)
-{
-    Net net;
-    loadNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt",
-            "dnn/halide_scheduler_opencl_squeezenet_v1_1.yml", 227, 227, "prob",
-            "caffe", DNN_TARGET_OPENCL, &net);
-    TEST_CYCLE() net.forward();
-    SANITY_CHECK_NOTHING();
-}
-
-PERF_TEST(Inception_5h_opencl, HalidePerfTest)
-{
-    Net net;
-    loadNet("dnn/tensorflow_inception_graph.pb", "",
-            "dnn/halide_scheduler_opencl_inception_5h.yml",
-            224, 224, "softmax2", "tensorflow", DNN_TARGET_OPENCL, &net);
-    TEST_CYCLE() net.forward("softmax2");
-    SANITY_CHECK_NOTHING();
-}
-
-PERF_TEST(ENet_opencl, HalidePerfTest)
-{
-    Net net;
-    loadNet("dnn/Enet-model-best.net", "", "dnn/halide_scheduler_opencl_enet.yml",
-            512, 256, "l367_Deconvolution", "torch", DNN_TARGET_OPENCL, &net);
-    TEST_CYCLE() net.forward();
-    SANITY_CHECK_NOTHING();
-}
-#endif  // HAVE_HALIDE
-
-}  // namespace cvtest
diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp
new file mode 100644 (file)
index 0000000..990470f
--- /dev/null
@@ -0,0 +1,149 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#include "perf_precomp.hpp"
+#include "opencv2/core/ocl.hpp"
+
+#include "opencv2/dnn/shape_utils.hpp"
+
+namespace
+{
+
+#ifdef HAVE_HALIDE
+#define TEST_DNN_BACKEND DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE
+#else
+#define TEST_DNN_BACKEND DNN_BACKEND_DEFAULT
+#endif
+#define TEST_DNN_TARGET DNN_TARGET_CPU, DNN_TARGET_OPENCL
+
+CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE)
+CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL)
+
+class DNNTestNetwork : public ::perf::TestBaseWithParam< tuple<DNNBackend, DNNTarget> >
+{
+public:
+    dnn::Backend backend;
+    dnn::Target target;
+
+    dnn::Net net;
+
+    void processNet(std::string weights, std::string proto, std::string halide_scheduler,
+                        int inWidth, int inHeight, const std::string& outputLayer,
+                        const std::string& framework)
+    {
+        backend = (dnn::Backend)(int)get<0>(GetParam());
+        target = (dnn::Target)(int)get<1>(GetParam());
+
+        if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL)
+        {
+#if defined(HAVE_OPENCL)
+            if (!cv::ocl::useOpenCL())
+#endif
+            {
+                throw ::SkipTestException("OpenCL is not available/disabled in OpenCV");
+            }
+        }
+
+        Mat input(inHeight, inWidth, CV_32FC3);
+        randu(input, 0.0f, 1.0f);
+
+
+        weights = findDataFile(weights, false);
+        if (!proto.empty())
+            proto = findDataFile(proto, false);
+        if (!halide_scheduler.empty() && backend == DNN_BACKEND_HALIDE)
+            halide_scheduler = findDataFile(std::string("dnn/halide_scheduler_") + (target == DNN_TARGET_OPENCL ? "opencl_" : "") + halide_scheduler, true);
+        if (framework == "caffe")
+        {
+            net = cv::dnn::readNetFromCaffe(proto, weights);
+        }
+        else if (framework == "torch")
+        {
+            net = cv::dnn::readNetFromTorch(weights);
+        }
+        else if (framework == "tensorflow")
+        {
+            net = cv::dnn::readNetFromTensorflow(weights);
+        }
+        else
+            CV_Error(Error::StsNotImplemented, "Unknown framework " + framework);
+
+        net.setInput(blobFromImage(input, 1.0, Size(), Scalar(), false));
+        net.setPreferableBackend(backend);
+        net.setPreferableTarget(target);
+        if (backend == DNN_BACKEND_HALIDE)
+        {
+            net.setHalideScheduler(halide_scheduler);
+        }
+
+        MatShape netInputShape = shape(1, 3, inHeight, inWidth);
+        size_t weightsMemory = 0, blobsMemory = 0;
+        net.getMemoryConsumption(netInputShape, weightsMemory, blobsMemory);
+        int64 flops = net.getFLOPS(netInputShape);
+
+        net.forward(outputLayer); // warmup
+
+        std::cout << "Memory consumption:" << std::endl;
+        std::cout << "    Weights(parameters): " << divUp(weightsMemory, 1u<<20) << " Mb" << std::endl;
+        std::cout << "    Blobs: " << divUp(blobsMemory, 1u<<20) << " Mb" << std::endl;
+        std::cout << "Calculation complexity: " << flops * 1e-9 << " GFlops" << std::endl;
+
+        PERF_SAMPLE_BEGIN()
+            net.forward();
+        PERF_SAMPLE_END()
+
+        SANITY_CHECK_NOTHING();
+    }
+};
+
+
+PERF_TEST_P_(DNNTestNetwork, AlexNet)
+{
+    processNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt",
+            "alexnet.yml", 227, 227, "prob", "caffe");
+}
+
+PERF_TEST_P_(DNNTestNetwork, GoogLeNet)
+{
+    processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
+            "", 224, 224, "prob", "caffe");
+}
+
+PERF_TEST_P_(DNNTestNetwork, ResNet50)
+{
+    processNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt",
+            "resnet_50.yml", 224, 224, "prob", "caffe");
+}
+
+PERF_TEST_P_(DNNTestNetwork, SqueezeNet_v1_1)
+{
+    processNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt",
+            "squeezenet_v1_1.yml", 227, 227, "prob", "caffe");
+}
+
+PERF_TEST_P_(DNNTestNetwork, Inception_5h)
+{
+    processNet("dnn/tensorflow_inception_graph.pb", "",
+            "inception_5h.yml",
+            224, 224, "softmax2", "tensorflow");
+}
+
+PERF_TEST_P_(DNNTestNetwork, ENet)
+{
+    processNet("dnn/Enet-model-best.net", "", "enet.yml",
+            512, 256, "l367_Deconvolution", "torch");
+}
+
+
+INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork,
+    testing::Combine(
+        ::testing::Values(TEST_DNN_BACKEND),
+        DNNTarget::all()
+    )
+);
+
+} // namespace
index 5cdbc6d..38e7d61 100644 (file)
@@ -1,11 +1,3 @@
-#ifdef __GNUC__
-#  pragma GCC diagnostic ignored "-Wmissing-declarations"
-#  if defined __clang__ || defined __APPLE__
-#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
-#    pragma GCC diagnostic ignored "-Wextra"
-#  endif
-#endif
-
 #ifndef __OPENCV_PERF_PRECOMP_HPP__
 #define __OPENCV_PERF_PRECOMP_HPP__
 
@@ -14,4 +6,9 @@
 #include <opencv2/highgui.hpp>
 #include <opencv2/dnn.hpp>
 
+using namespace cvtest;
+using namespace perf;
+using namespace cv;
+using namespace dnn;
+
 #endif
index 9923cf3..1a22882 100644 (file)
@@ -216,7 +216,7 @@ public:
                 shape.push_back((int)_shape.dim(i));
         }
         else
-            CV_Error(Error::StsError, "Unknown shape of input blob");
+            shape.resize(1, 1);  // Is a scalar.
     }
 
     void blobFromProto(const caffe::BlobProto &pbBlob, cv::Mat &dstBlob)
@@ -274,9 +274,9 @@ public:
     struct BlobNote
     {
         BlobNote(const std::string &_name, int _layerId, int _outNum) :
-            name(_name.c_str()), layerId(_layerId), outNum(_outNum) {}
+            name(_name), layerId(_layerId), outNum(_outNum) {}
 
-        const char *name;
+        std::string name;
         int layerId, outNum;
     };
 
@@ -293,14 +293,13 @@ public:
         addedBlobs.reserve(layersSize + 1);
 
         //setup input layer names
+        std::vector<String> netInputs(net.input_size());
         {
-            std::vector<String> netInputs(net.input_size());
             for (int inNum = 0; inNum < net.input_size(); inNum++)
             {
                 addedBlobs.push_back(BlobNote(net.input(inNum), 0, inNum));
                 netInputs[inNum] = net.input(inNum);
             }
-            dstNet.setInputsNames(netInputs);
         }
 
         for (int li = 0; li < layersSize; li++)
@@ -317,6 +316,13 @@ public:
             if (repetitions)
                 name += String("_") + toString(repetitions);
 
+            if (type == "Input")
+            {
+                addedBlobs.push_back(BlobNote(name, 0, netInputs.size()));
+                netInputs.push_back(name);
+                continue;
+            }
+
             int id = dstNet.addLayer(name, type, layerParams);
 
             for (int inNum = 0; inNum < layer.bottom_size(); inNum++)
@@ -325,6 +331,7 @@ public:
             for (int outNum = 0; outNum < layer.top_size(); outNum++)
                 addOutput(layer, id, outNum);
         }
+        dstNet.setInputsNames(netInputs);
 
         addedBlobs.clear();
     }
index 64bb85f..424e842 100644 (file)
@@ -875,7 +875,7 @@ struct Net::Impl
 
         if (preferableBackend == DNN_BACKEND_DEFAULT)
         {
-            CV_Assert(preferableTarget == DNN_TARGET_CPU);
+            CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL);
             return;
         }
 
@@ -1000,6 +1000,7 @@ struct Net::Impl
         Ptr<Layer> layerPtr = ld.getLayerInstance();
         {
             layerPtr->finalize(ld.inputBlobs, ld.outputBlobs);
+            layerPtr->preferableTarget = preferableTarget;
 #if 0
             std::cout << "\toutputs:";
             size_t noutputs = ld.outputBlobs.size();
@@ -1026,7 +1027,7 @@ struct Net::Impl
 
     void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
     {
-        if( !fusion || preferableBackend == DNN_BACKEND_HALIDE )
+        if( !fusion || !(preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_CPU))
             return;
 
         CV_TRACE_FUNCTION();
@@ -1236,7 +1237,6 @@ struct Net::Impl
         }
 
         layersTimings.resize(lastLayerId + 1, 0);
-
         fuseLayers(blobsToKeep_);
     }
 
@@ -1402,7 +1402,7 @@ struct Net::Impl
         }
         else
         {
-            CV_Assert(preferableTarget == DNN_TARGET_CPU);
+            CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL);
         }
         return ld.outputBlobs[pin.oid];
     }
@@ -1963,12 +1963,12 @@ int64 Net::getPerfProfile(std::vector<double>& timings)
 
 Importer::~Importer() {}
 
-Layer::Layer() {}
+Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
 
 Layer::Layer(const LayerParams &params)
     : blobs(params.blobs), name(params.name), type(params.type)
 {
-
+    preferableTarget = DNN_TARGET_CPU;
 }
 
 void Layer::setParamsFrom(const LayerParams &params)
index 06f4502..1943b40 100644 (file)
@@ -83,6 +83,7 @@ void initializeLayerFactory()
     CV_DNN_REGISTER_LAYER_CLASS(Concat,         ConcatLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Reshape,        ReshapeLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Flatten,        FlattenLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(ResizeNearestNeighbor, ResizeNearestNeighborLayer);
 
     CV_DNN_REGISTER_LAYER_CLASS(Convolution,    ConvolutionLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Deconvolution,  DeconvolutionLayer);
@@ -96,6 +97,7 @@ void initializeLayerFactory()
     CV_DNN_REGISTER_LAYER_CLASS(ReLU,           ReLULayer);
     CV_DNN_REGISTER_LAYER_CLASS(ReLU6,          ReLU6Layer);
     CV_DNN_REGISTER_LAYER_CLASS(ChannelsPReLU,  ChannelsPReLULayer);
+    CV_DNN_REGISTER_LAYER_CLASS(PReLU,          ChannelsPReLULayer);
     CV_DNN_REGISTER_LAYER_CLASS(Sigmoid,        SigmoidLayer);
     CV_DNN_REGISTER_LAYER_CLASS(TanH,           TanHLayer);
     CV_DNN_REGISTER_LAYER_CLASS(ELU,            ELULayer);
@@ -106,6 +108,7 @@ void initializeLayerFactory()
     CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool,      MaxUnpoolLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Dropout,        BlankLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Identity,       BlankLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(Silence,        BlankLayer);
 
     CV_DNN_REGISTER_LAYER_CLASS(Crop,           CropLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Eltwise,        EltwiseLayer);
@@ -119,6 +122,8 @@ void initializeLayerFactory()
     CV_DNN_REGISTER_LAYER_CLASS(Shift,          ShiftLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Padding,        PaddingLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Scale,          ScaleLayer);
+
+    CV_DNN_REGISTER_LAYER_CLASS(LSTM,           LSTMLayer);
 }
 
 CV__DNN_EXPERIMENTAL_NS_END
index 67d82c2..6833b04 100644 (file)
@@ -43,6 +43,7 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "op_halide.hpp"
+#include "opencl_kernels_dnn.hpp"
 
 namespace cv
 {
@@ -174,11 +175,62 @@ public:
         }
     };
 
+#ifdef HAVE_OPENCL
+    bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        CV_TRACE_FUNCTION();
+        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+        int cAxis = clamp(axis, inputs[0]->dims);
+        if (!(cAxis == 1 && outputs[0].dims == 4 && !padding))
+            return false;
+
+        int bottom_concat_axis;
+        int concat_size = inputs[0]->size[2] * inputs[0]->size[3];
+        int top_concat_axis = outputs[0].size[1];
+        int offset_concat_axis = 0;
+        UMat inpMat, outMat;
+        outMat = outputs[0].getUMat(ACCESS_WRITE);
+
+        ocl::Kernel kernel;
+        String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0]->type()) + String(" ");
+        if (!kernel.create("concat", ocl::dnn::concat_oclsrc, buildopt))
+            return false;
+
+        for (size_t i = 0; i < inputs.size(); i++)
+        {
+            inpMat = inputs[i]->getUMat(ACCESS_READ);
+            bottom_concat_axis = inputs[i]->size[1];
+            size_t nthreads = inputs[i]->total();
+
+            kernel.set(0, (int)nthreads);
+            kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat));
+            kernel.set(2, (int)inputs[i]->size[0]);
+            kernel.set(3, (int)concat_size);
+            kernel.set(4, (int)top_concat_axis);
+            kernel.set(5, (int)bottom_concat_axis);
+            kernel.set(6, (int)offset_concat_axis);
+            kernel.set(7, ocl::KernelArg::PtrWriteOnly(outMat));
+
+            if (!kernel.run(1, &nthreads, NULL, false))
+                return false;
+
+            offset_concat_axis += bottom_concat_axis;
+        }
+
+        return true;
+    }
+#endif
+
     void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
     {
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
+        CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+                   forward_ocl(inputs, outputs, internals))
+
         int cAxis = clamp(axis, inputs[0]->dims);
         Mat& outMat = outputs[0];
 
index 68c71bc..d637f54 100644 (file)
 #include "opencv2/core/hal/intrin.hpp"
 #include <iostream>
 
+#ifdef HAVE_OPENCL
+using namespace cv::dnn::ocl4dnn;
+#endif
+
 namespace cv
 {
 namespace dnn
@@ -150,6 +154,11 @@ public:
     Ptr<BatchNormLayer> bnorm;
     Ptr<ScaleLayer> scaleLayer;
 
+#ifdef HAVE_OPENCL
+    Ptr<OCL4DNNConvSpatial<float> > convolutionOp;
+    std::vector<UMat> umat_blobs;
+#endif
+
     MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const
     {
         Size out(outShape[3], outShape[2]);
@@ -252,24 +261,13 @@ public:
         }
 
         Halide::RDom r(0, kernel.width, 0, kernel.height, 0, inpGroupCn);
-
+        Halide::Expr kx = x * stride.width - pad.width + r.x * dilation.width;
+        Halide::Expr ky = y * stride.height - pad.height + r.y * dilation.height;
         Halide::Expr kc = r.z;
-        if (group > 1)
+        for (int i = 1; i < group; ++i)
         {
-            int outCnBound = outGroupCn;
-            int inpChBound = inpGroupCn;
-            Halide::Expr shift = select(c < outCnBound, 0, inpChBound);
-            for (int i = 2; i < group; ++i)
-            {
-                outCnBound += outGroupCn;
-                inpChBound += inpGroupCn;
-                shift = select(c < outCnBound, shift, inpChBound);
-            }
-            kc += shift;
+            kc = select(c < outGroupCn * i, kc, inpGroupCn * i + r.z);
         }
-
-        Halide::Expr kx = x * stride.width - pad.width + r.x * dilation.width;
-        Halide::Expr ky = y * stride.height - pad.height + r.y * dilation.height;
         Halide::Expr topExpr = sum(padded_input(kx, ky, kc, n) *
                                    weights(r.x, r.y, r.z, c));
         if (hasBias())
@@ -278,7 +276,6 @@ public:
             topExpr += bias(c);
         }
         top(x, y, c, n) = topExpr;
-        Ptr<BackendNode> pp(new HalideBackendNode({ padded_input, top }));
         return Ptr<BackendNode>(new HalideBackendNode({ padded_input, top }));
 #endif  // HAVE_HALIDE
         return Ptr<BackendNode>();
@@ -314,15 +311,15 @@ public:
                          Size kernel, Size pad, Size stride, Size dilation,
                          const ActivationLayer* activ, int ngroups, int nstripes )
         {
-            CV_Assert( input.dims == 4 && output.dims == 4 &&
-                       input.size[0] == output.size[0] &&
-                       weights.rows == output.size[1] &&
-                       weights.cols == (input.size[1]/ngroups)*kernel.width*kernel.height &&
-                       input.type() == output.type() &&
-                       input.type() == weights.type() &&
-                       input.type() == CV_32F &&
-                       input.isContinuous() &&
-                       output.isContinuous() &&
+            CV_Assert( input.dims == 4 && output.dims == 4,
+                       input.size[0] == output.size[0],
+                       weights.rows == output.size[1],
+                       weights.cols == (input.size[1]/ngroups)*kernel.width*kernel.height,
+                       input.type() == output.type(),
+                       input.type() == weights.type(),
+                       input.type() == CV_32F,
+                       input.isContinuous(),
+                       output.isContinuous(),
                        biasvec.size() == (size_t)output.size[1]+2);
             ParallelConv p;
 
@@ -648,6 +645,42 @@ public:
         }
     };
 
+#ifdef HAVE_OPENCL
+    bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        int group = inputs[0]->size[1] / umat_blobs[0].size[1];
+
+        if (convolutionOp.empty())
+        {
+            OCL4DNNConvConfig config;
+            config.in_shape = shape(*inputs[0]);
+            config.out_shape = shape(outputs[0]);
+            config.kernel = kernel;
+            config.pad = pad;
+            config.stride = stride;
+            config.dilation = dilation;
+            config.group = group;
+            config.bias_term = (hasBias()) ? true : false;
+
+            convolutionOp = Ptr<OCL4DNNConvSpatial<float> >(new OCL4DNNConvSpatial<float>(config));
+        }
+
+        for (size_t ii = 0; ii < outputs.size(); ii++)
+        {
+            UMat inpMat, outMat;
+            inpMat = inputs[ii]->getUMat(ACCESS_READ);
+            outMat = outputs[ii].getUMat(ACCESS_WRITE);
+
+            int batch_size = inpMat.size[0];
+
+            if (!convolutionOp->Forward(inpMat, umat_blobs[0], hasBias() ? umat_blobs[1] : UMat(),
+                                        outMat, batch_size))
+               return false;
+        }
+        return true;
+    }
+#endif
+
     void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
     {
         CV_TRACE_FUNCTION();
@@ -661,6 +694,10 @@ public:
         int ngroups = inputs[0]->size[1]/blobs[0].size[1];
         CV_Assert(outputs[0].size[1] % ngroups == 0);
 
+        CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+                   forward_ocl(inputs, outputs, internals))
+
         int k, outCn = blobs[0].size[0];
 
         if( weightsMat.empty() )
@@ -793,7 +830,7 @@ public:
         int inpH = inpShape[2];
         int inpW = inpShape[3];
         int outCn = outShape[1];
-        int ngroups = inpCn / blobs[0].size[1];
+        int ngroups = inpCn / blobs[0].size[0];
         int outGroupCn = outCn / ngroups;
         int ksize = outGroupCn * kernel.height * kernel.width;
         return shape(ksize, inpH * inpW);
@@ -804,7 +841,7 @@ public:
                          std::vector<MatShape> &outputs,
                          std::vector<MatShape> &internals) const
     {
-        CV_Assert(!hasBias() || blobs[1].total() == (size_t)blobs[0].size[0]);
+        CV_Assert(!hasBias() || blobs[1].total() == (size_t)numOutput);
         CV_Assert(inputs.size() != 0);
 
         int inpCn = inputs[0][1];
@@ -813,12 +850,13 @@ public:
 
         int outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height;
         int outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width;
-        int outCn = blobs[0].size[0];
+        int outCn = numOutput;
 
-        int ngroups = inpCn / blobs[0].size[1];
+        CV_Assert(outCn % blobs[0].size[1] == 0);
+        int ngroups = outCn / blobs[0].size[1];
 
         CV_Assert(inpCn % ngroups == 0 && outCn % ngroups == 0);
-        CV_Assert(blobs[0].size[0] == outCn && blobs[0].size[1] == inpCn / ngroups);
+        CV_Assert(blobs[0].size[0] == inpCn);
 
         int dims[] = {inputs[0][0], outCn, outH, outW};
         outputs.resize(inputs.size(), shape(dims));
@@ -1073,7 +1111,7 @@ public:
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-        int outCn = blobs[0].size[0];
+        int outCn = numOutput;
         int inpCn = inputs[0]->size[1];
         bool is1x1flag = is1x1();
         int nstripes = getNumThreads();
@@ -1086,9 +1124,9 @@ public:
 
         for (size_t ii = 0; ii < outputs.size(); ii++)
         {
-            int ngroups = inpCn / blobs[0].size[1];
-            int inpGroupCn = blobs[0].size[1];
-            int outGroupCn = outCn / ngroups;
+            int ngroups = outCn / blobs[0].size[1];
+            int inpGroupCn = inpCn / ngroups;
+            int outGroupCn = blobs[0].size[1];
             const Mat& inp = *inputs[ii];
             Mat& out = outputs[ii];
             int numImg = inp.size[0];
@@ -1126,18 +1164,16 @@ public:
 #ifdef HAVE_HALIDE
         Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
 
-        int inW, inH, inC, inN, outC = blobs[0].size[0];
+        int inW, inH, inC, inN;
         getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
-
-        if (inC / blobs[0].size[1] != 1)
-            CV_Error(cv::Error::StsNotImplemented,
-                     "Halide backend for Deconvolution with group > 1 is not implemented");
+        const int outGroupCn = blobs[0].size[1];
+        const int group = numOutput / outGroupCn;
+        const int inpGroupCn = blobs[0].size[0] / group;
 
         Halide::Var x("x"), y("y"), c("c"), n("n");
         Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
         Halide::Func padded_input(name + "_constant_exterior");
-        auto weights = wrapToHalideBuffer(blobs[0], {kernel.width,
-                                                     kernel.height, outC, inC});
+        auto weights = wrapToHalideBuffer(blobs[0]);
 
         Halide::Func dilated_input("dilated_input");
         dilated_input(x, y, c, n) = 0.0f;
@@ -1153,13 +1189,21 @@ public:
                                                           0, inC, 0, inN);
         padded_input(x, y, c, n) = bounded(x, y, c, n);
 
-        Halide::RDom r(0, kernel.width, 0, kernel.height, 0, inC);
-        Halide::Expr topExpr = sum(
-            padded_input(x + pad.width - r.x, y + pad.height - r.y, r.z, n) *
-            weights(r.x, r.y, c, r.z));
+        Halide::RDom r(0, kernel.width, 0, kernel.height, 0, inpGroupCn);
+        Halide::Expr kx = x + pad.width - r.x;
+        Halide::Expr ky = y + pad.height - r.y;
+        Halide::Expr kInC = r.z;
+        Halide::Expr kOutC = c;
+        for (int i = 1; i < group; ++i)
+        {
+            kInC = select(c < outGroupCn * i, kInC, inpGroupCn * i + r.z);
+            kOutC = select(c < outGroupCn * i, kOutC, c - outGroupCn * i);
+        }
+        Halide::Expr topExpr = sum(padded_input(kx, ky, kInC, n) *
+                                   weights(r.x, r.y, kOutC, kInC));
         if (hasBias())
         {
-            auto bias = wrapToHalideBuffer(blobs[1], {outC});
+            auto bias = wrapToHalideBuffer(blobs[1], {numOutput});
             topExpr += bias(c);
         }
         top(x, y, c, n) = topExpr;
@@ -1193,23 +1237,30 @@ static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, const Laye
                                l->pad.width, l->stride.height, l->stride.width, l->dilation.height,
                                l->dilation.width, l->padMode);
 
-    bool bias = params.get<bool>("bias_term", true);
-    int numOutput = params.get<int>("num_output");
+    l->numOutput = params.get<int>("num_output");
     int ngroups = params.get<int>("group", 1);
 
     l->adjustPad.height = params.get<int>("adj_h", 0);
     l->adjustPad.width = params.get<int>("adj_w", 0);
 
-    CV_Assert(numOutput % ngroups == 0);
-    CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1));
+    CV_Assert(l->numOutput % ngroups == 0);
     CV_Assert(l->adjustPad.width < l->stride.width &&
               l->adjustPad.height < l->stride.height);
 }
 
 Ptr<BaseConvolutionLayer> ConvolutionLayer::create(const LayerParams &params)
 {
-    Ptr<BaseConvolutionLayer> l(new ConvolutionLayerImpl);
+    ConvolutionLayerImpl* conv_ptr = new ConvolutionLayerImpl;
+    Ptr<BaseConvolutionLayer> l(conv_ptr);
     initConvDeconvLayerFromCaffe(l, params);
+
+#ifdef HAVE_OPENCL
+    size_t n = params.blobs.size();
+    conv_ptr->umat_blobs.resize(n);
+    for (int i = 0; i < n; i++)
+        conv_ptr->umat_blobs[i] = params.blobs[i].getUMat(ACCESS_READ);
+#endif
+
     return l;
 }
 
index 0b72326..505b9c7 100644 (file)
@@ -81,6 +81,8 @@ public:
 
     float _nmsThreshold;
     int _topK;
+    // Whenever predicted bounding boxes are respresented in YXHW instead of XYWH layout.
+    bool _locPredTransposed;
 
     enum { _numAxes = 4 };
     static const std::string _layerName;
@@ -148,6 +150,7 @@ public:
         _keepTopK = getParameter<int>(params, "keep_top_k");
         _confidenceThreshold = getParameter<float>(params, "confidence_threshold", 0, false, -FLT_MAX);
         _topK = getParameter<int>(params, "top_k", 0, false, -1);
+        _locPredTransposed = getParameter<bool>(params, "loc_pred_transposed", 0, false, false);
 
         getCodeType(params);
 
@@ -209,7 +212,7 @@ public:
             // Retrieve all location predictions
             std::vector<LabelBBox> allLocationPredictions;
             GetLocPredictions(locationData, num, numPriors, _numLocClasses,
-                              _shareLocation, allLocationPredictions);
+                              _shareLocation, _locPredTransposed, allLocationPredictions);
 
             // Retrieve all confidences
             GetConfidenceScores(confidenceData, num, numPriors, _numClasses, allConfidenceScores);
@@ -540,11 +543,14 @@ public:
     //    num_loc_classes: number of location classes. It is 1 if share_location is
     //      true; and is equal to number of classes needed to predict otherwise.
     //    share_location: if true, all classes share the same location prediction.
+    //    loc_pred_transposed: if true, represent four bounding box values as
+    //                         [y,x,height,width] or [x,y,width,height] otherwise.
     //    loc_preds: stores the location prediction, where each item contains
     //      location prediction for an image.
     static void GetLocPredictions(const float* locData, const int num,
                            const int numPredsPerClass, const int numLocClasses,
-                           const bool shareLocation, std::vector<LabelBBox>& locPreds)
+                           const bool shareLocation, const bool locPredTransposed,
+                           std::vector<LabelBBox>& locPreds)
     {
         locPreds.clear();
         if (shareLocation)
@@ -566,10 +572,20 @@ public:
                         labelBBox[label].resize(numPredsPerClass);
                     }
                     caffe::NormalizedBBox& bbox = labelBBox[label][p];
-                    bbox.set_xmin(locData[startIdx + c * 4]);
-                    bbox.set_ymin(locData[startIdx + c * 4 + 1]);
-                    bbox.set_xmax(locData[startIdx + c * 4 + 2]);
-                    bbox.set_ymax(locData[startIdx + c * 4 + 3]);
+                    if (locPredTransposed)
+                    {
+                        bbox.set_ymin(locData[startIdx + c * 4]);
+                        bbox.set_xmin(locData[startIdx + c * 4 + 1]);
+                        bbox.set_ymax(locData[startIdx + c * 4 + 2]);
+                        bbox.set_xmax(locData[startIdx + c * 4 + 3]);
+                    }
+                    else
+                    {
+                        bbox.set_xmin(locData[startIdx + c * 4]);
+                        bbox.set_ymin(locData[startIdx + c * 4 + 1]);
+                        bbox.set_xmax(locData[startIdx + c * 4 + 2]);
+                        bbox.set_ymax(locData[startIdx + c * 4 + 3]);
+                    }
                 }
             }
         }
index dee3fbb..eb93363 100644 (file)
 //M*/
 
 #include "../precomp.hpp"
+#include "layers_common.hpp"
 #include "op_halide.hpp"
 #include "opencv2/imgproc.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
+#include "opencl_kernels_dnn.hpp"
+#include <iostream>
 
 namespace cv
 {
@@ -158,6 +161,10 @@ public:
     {
         CV_TRACE_FUNCTION();
 
+        CV_OCL_RUN((this->preferableTarget == DNN_TARGET_OPENCL) &&
+                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+                   func.applyOCL(inputs, outputs, internals))
+
         for (size_t i = 0; i < inputs.size(); i++)
         {
             const Mat &src = *inputs[i];
@@ -191,6 +198,13 @@ public:
     bool run_parallel;
 };
 
+#ifdef HAVE_OPENCL
+static String oclGetTMacro(const UMat &m)
+{
+    return String("-DT=") + ocl::typeToStr(m.type()) + String(" ");
+}
+#endif
+
 struct ReLUFunctor
 {
     typedef ReLULayer Layer;
@@ -230,6 +244,46 @@ struct ReLUFunctor
         }
     }
 
+#ifdef HAVE_OPENCL
+    bool initKernel(ocl::Kernel &ker, const UMat &src) const
+    {
+        const char *buildoptSlope = (slope == 0) ? "-DRELU_NO_SLOPE" : "";
+        String buildopt = oclGetTMacro(src) + buildoptSlope;
+
+        if (!ker.create("ReLUForward", ocl::dnn::activations_oclsrc, buildopt))
+            return false;
+
+        if (slope != 0)
+            ker.set(3, (float)slope);
+
+        return true;
+    }
+
+    bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
+
+        for (size_t i = 0; i < inputs.size(); i++)
+        {
+            UMat src, dst;
+            inputs[i]->copyTo(src);
+            dst = outputs[i].getUMat(ACCESS_WRITE);
+            CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
+
+            ocl::Kernel ker;
+            CV_Assert(initKernel(ker, src));
+            ker.set(0, (int)src.total());
+            ker.set(1, ocl::KernelArg::PtrReadOnly(src));
+            ker.set(2, ocl::KernelArg::PtrWriteOnly(dst));
+
+            size_t gSize = src.total();
+            CV_Assert(ker.run(1, &gSize, &wgSize, false));
+        }
+
+        return true;
+    }
+#endif
+
 #ifdef HAVE_HALIDE
     void attachHalide(const Halide::Expr& input, Halide::Func& top)
     {
@@ -293,6 +347,14 @@ struct ReLU6Functor
         }
     }
 
+#ifdef HAVE_OPENCL
+    bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        // TODO: implement OCL version
+        return false;
+    }
+#endif
+
 #ifdef HAVE_HALIDE
     void attachHalide(const Halide::Expr& input, Halide::Func& top)
     {
@@ -320,6 +382,14 @@ struct TanHFunctor
         }
     }
 
+#ifdef HAVE_OPENCL
+    bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        // TODO: implement OCL version
+        return false;
+    }
+#endif
+
 #ifdef HAVE_HALIDE
     void attachHalide(const Halide::Expr& input, Halide::Func& top)
     {
@@ -347,6 +417,14 @@ struct SigmoidFunctor
         }
     }
 
+#ifdef HAVE_OPENCL
+    bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        // TODO: implement OCL version
+        return false;
+    }
+#endif
+
 #ifdef HAVE_HALIDE
     void attachHalide(const Halide::Expr& input, Halide::Func& top)
     {
@@ -376,6 +454,14 @@ struct ELUFunctor
         }
     }
 
+#ifdef HAVE_OPENCL
+    bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        // TODO: implement OCL version
+        return false;
+    }
+#endif
+
 #ifdef HAVE_HALIDE
     void attachHalide(const Halide::Expr& input, Halide::Func& top)
     {
@@ -403,6 +489,14 @@ struct AbsValFunctor
         }
     }
 
+#ifdef HAVE_OPENCL
+    bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        // TODO: implement OCL version
+        return false;
+    }
+#endif
+
 #ifdef HAVE_HALIDE
     void attachHalide(const Halide::Expr& input, Halide::Func& top)
     {
@@ -430,6 +524,14 @@ struct BNLLFunctor
         }
     }
 
+#ifdef HAVE_OPENCL
+    bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        // TODO: implement OCL version
+        return false;
+    }
+#endif
+
 #ifdef HAVE_HALIDE
     void attachHalide(const Halide::Expr& input, Halide::Func& top)
     {
@@ -479,6 +581,14 @@ struct PowerFunctor
         }
     }
 
+#ifdef HAVE_OPENCL
+    bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        // TODO: implement OCL version
+        return false;
+    }
+#endif
+
 #ifdef HAVE_HALIDE
     void attachHalide(const Halide::Expr& input, Halide::Func& top)
     {
@@ -524,18 +634,18 @@ struct ChannelsPReLUFunctor
             v_float32x4 s4 = v_setall_f32(s), z = v_setzero_f32();
             for( ; i <= len - 16; i += 16 )
             {
-                v_float32x4 x0 = v_load(ptr + i);
-                v_float32x4 x1 = v_load(ptr + i + 4);
-                v_float32x4 x2 = v_load(ptr + i + 8);
-                v_float32x4 x3 = v_load(ptr + i + 12);
+                v_float32x4 x0 = v_load(srcptr + i);
+                v_float32x4 x1 = v_load(srcptr + i + 4);
+                v_float32x4 x2 = v_load(srcptr + i + 8);
+                v_float32x4 x3 = v_load(srcptr + i + 12);
                 x0 = v_select(x0 >= z, x0, x0*s4);
                 x1 = v_select(x1 >= z, x1, x1*s4);
                 x2 = v_select(x2 >= z, x2, x2*s4);
                 x3 = v_select(x3 >= z, x3, x3*s4);
-                v_store(ptr + i, x0);
-                v_store(ptr + i + 4, x1);
-                v_store(ptr + i + 8, x2);
-                v_store(ptr + i + 12, x3);
+                v_store(dstptr + i, x0);
+                v_store(dstptr + i + 4, x1);
+                v_store(dstptr + i + 8, x2);
+                v_store(dstptr + i + 12, x3);
             }
         #endif
             for( ; i < len; i++ )
@@ -546,6 +656,14 @@ struct ChannelsPReLUFunctor
         }
     }
 
+#ifdef HAVE_OPENCL
+    bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        // TODO: implement OCL version
+        return false;
+    }
+#endif
+
 #ifdef HAVE_HALIDE
     void attachHalide(const Halide::Expr& input, Halide::Func& top)
     {
@@ -636,8 +754,15 @@ Ptr<PowerLayer> PowerLayer::create(const LayerParams& params)
     return l;
 }
 
-Ptr<ChannelsPReLULayer> ChannelsPReLULayer::create(const LayerParams& params)
+Ptr<Layer> ChannelsPReLULayer::create(const LayerParams& params)
 {
+    CV_Assert(params.blobs.size() == 1);
+    if (params.blobs[0].total() == 1)
+    {
+        LayerParams reluParams = params;
+        reluParams.set("negative_slope", params.blobs[0].at<float>(0));
+        return ReLULayer::create(reluParams);
+    }
     Ptr<ChannelsPReLULayer> l(new ElementWiseLayer<ChannelsPReLUFunctor>(ChannelsPReLUFunctor(params.blobs[0])));
     l->setParamsFrom(params);
 
index fa49109..9ccb87b 100644 (file)
@@ -119,6 +119,8 @@ public:
         EltwiseOp op;
         int nstripes;
         const ActivationLayer* activ;
+        int channels;
+        size_t planeSize;
 
         EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(EltwiseLayer::PROD), nstripes(0), activ(0) {}
 
@@ -126,7 +128,7 @@ public:
                         const std::vector<float>& coeffs, EltwiseOp op,
                         const ActivationLayer* activ, int nstripes)
         {
-            CV_Assert(dst.dims == 4 && dst.type() == CV_32F && dst.isContinuous());
+            CV_Assert(1 < dst.dims && dst.dims <= 4, dst.type() == CV_32F, dst.isContinuous());
             CV_Assert(coeffs.empty() || coeffs.size() == (size_t)nsrcs);
 
             for( int i = 0; i > nsrcs; i++ )
@@ -142,6 +144,11 @@ public:
             p.dst = &dst;
             p.op = op;
             p.nstripes = nstripes;
+            p.channels = (dst.dims == 4 ? dst.size[1] : 1);
+            p.planeSize = (dst.dims >= 3 ? dst.size[dst.dims - 1] * dst.size[dst.dims - 2] :
+                                           dst.size[dst.dims - 1]);
+            CV_Assert(dst.total() == dst.size[0] * p.channels * p.planeSize);
+
             bool simpleCoeffs = true;
             if( op == EltwiseLayer::SUM && !coeffs.empty() )
             {
@@ -162,13 +169,11 @@ public:
 
         void operator()(const Range& r) const
         {
-            size_t planeSize = dst->size[2]*dst->size[3];
             size_t total = dst->size[0]*planeSize;
             size_t stripeSize = (total + nstripes - 1)/nstripes;
             size_t stripeStart = r.start*stripeSize;
             size_t stripeEnd = std::min(r.end*stripeSize, total);
             int c, j, k, n = nsrcs;
-            int channels = dst->size[1];
             const float* coeffsptr = coeffs && !coeffs->empty() ? &coeffs->at(0) : 0;
             float* dstptr0 = dst->ptr<float>();
             int blockSize0 = 1 << 12, blockSize = blockSize0;
index 9bec3b0..6067b3f 100644 (file)
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "op_halide.hpp"
+#include "opencl_kernels_dnn.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
 
+#ifdef HAVE_OPENCL
+using namespace cv::dnn::ocl4dnn;
+#endif
+
 namespace cv
 {
 namespace dnn
@@ -55,6 +60,11 @@ class FullyConnectedLayerImpl : public InnerProductLayer
 public:
     enum { VEC_ALIGN = 8 };
 
+#ifdef HAVE_OPENCL
+    Ptr<OCL4DNNInnerProduct<float> > innerProductOp;
+    std::vector<UMat> umat_blobs;
+#endif
+
     FullyConnectedLayerImpl(const LayerParams& params)
     {
         setParamsFrom(params);
@@ -84,6 +94,12 @@ public:
             biasMat = blobs[1] = blobs[1].reshape(1, 1);
         else
             biasMat = Mat::zeros(1, numOutput, weightsMat.type());
+
+#ifdef HAVE_OPENCL
+        size_t n = blobs.size();
+        umat_blobs.resize(n);
+        for (int i = 0; i < n; i++) umat_blobs[i] = blobs[i].getUMat(ACCESS_READ);
+#endif
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -91,14 +107,18 @@ public:
                          std::vector<MatShape> &outputs,
                          std::vector<MatShape> &) const
     {
-        CV_Assert(inputs.size() > 0);
+        CV_Assert(inputs.size() == 1);
         CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
         CV_Assert(blobs[0].dims == 2);
 
         int cAxis = clamp(axis, inputs[0]);
-        int outerSize = total(inputs[0], 0, cAxis);
         int numOutput = blobs[0].size[0];
-        outputs.resize(inputs.size(), shape(outerSize, numOutput));
+        MatShape outShape(cAxis + 1);
+        for (int i = 0; i < cAxis; ++i)
+            outShape[i] = inputs[0][i];
+        outShape.back() = numOutput;
+
+        outputs.resize(inputs.size(), outShape);
 
         CV_Assert(!bias || (size_t)numOutput == blobs[1].total());
         return false;
@@ -238,11 +258,78 @@ public:
         bool useAVX2;
     };
 
+#ifdef HAVE_OPENCL
+    bool forward_ocl(std::vector<Mat*> &input, std::vector<Mat> &output)
+    {
+        int axisCan = clamp(axis, input[0]->dims);
+        int numOutput = blobs[0].size[0];
+        int innerSize = blobs[0].size[1];
+        int outerSize = input[0]->total(0, axisCan);
+        bool ret = true;
+
+        if (innerProductOp.empty())
+        {
+            OCL4DNNInnerProductConfig config;
+            config.num_output = numOutput;
+            config.bias_term = bias;
+            config.M = outerSize;
+            config.K = innerSize;
+
+            innerProductOp = Ptr<OCL4DNNInnerProduct<float> >(new OCL4DNNInnerProduct<float>(config));
+        }
+
+        UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
+        for (size_t i = 0; i < input.size(); i++)
+        {
+            UMat srcMat, dstMat;
+            srcMat = input[i]->reshape(1, outerSize).getUMat(ACCESS_READ);
+            dstMat = output[i].reshape(1, outerSize).getUMat(ACCESS_WRITE);
+            dstMat.setTo(0.0f);
+
+            if (!innerProductOp->Forward(srcMat, umat_blobs[0], (bias) ? umat_blobs[1] : UMat(), dstMat))
+            {
+                ret = false;
+                break;
+            }
+
+            if (bias && (outerSize > 1))
+            {
+                UMat& biases = umat_blobs[1];
+                cv::gemm(biasOnesMat, biases, 1, dstMat, 1, dstMat, 0);
+            }
+        }
+
+        if (ret) return true;
+
+        UMat& weights = umat_blobs[0];
+        for (size_t i = 0; i < input.size(); i++)
+        {
+            UMat srcMat, dstMat;
+            srcMat = input[i]->reshape(1, outerSize).getUMat(ACCESS_READ);
+            dstMat = output[i].reshape(1, outerSize).getUMat(ACCESS_WRITE);
+
+            cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat, GEMM_2_T);
+
+            if (bias)
+            {
+                UMat& biases = umat_blobs[1];
+                cv::gemm(biasOnesMat, biases, 1, dstMat, 1, dstMat, 0);
+            }
+        }
+
+        return true;
+    }
+#endif
+
     void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &)
     {
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
+        CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+                   forward_ocl(input, output))
+
         int axisCan = clamp(axis, input[0]->dims);
         int outerSize = input[0]->total(0, axisCan);
 
index 46170e9..ed8add9 100644 (file)
 #include "layers/layers_common.simd_declarations.hpp"
 #undef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
 
+#ifdef HAVE_OPENCL
+#include "ocl4dnn.hpp"
+#endif
+
 namespace cv
 {
 namespace dnn
index 9890587..72a0f34 100644 (file)
@@ -59,7 +59,7 @@ void fastGEMM( const float* aptr, size_t astep, const float* bptr,
 
 #if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_AVX
 
-#if !CV_FMA // AVX workaround
+#if !CV_FMA3 // AVX workaround
 #undef _mm256_fmadd_ps
 #define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(c, _mm256_mul_ps(a, b))
 #endif
index 9efb9b8..bba82aa 100644 (file)
@@ -53,8 +53,8 @@ public:
             norm = cv::norm(*inputs[0], NORM_L2);
         else
         {
-            pow(abs(*inputs[0]), pnorm, internals[0]);
-            norm = pow(sum(internals[0])[0], 1.0f / pnorm);
+            cv::pow(abs(*inputs[0]), pnorm, internals[0]);
+            norm = pow((float)sum(internals[0])[0], 1.0f / pnorm);
         }
         multiply(*inputs[0], 1.0f / (norm + epsilon), outputs[0]);
     }
index aa7a7cb..62dde95 100644 (file)
 #include "opencv2/imgproc.hpp"
 #include "opencv2/dnn/shape_utils.hpp"
 #include "opencv2/core/hal/hal.hpp"
+#include "opencl_kernels_dnn.hpp"
 #include <algorithm>
 
+#ifdef HAVE_OPENCL
+using namespace cv::dnn::ocl4dnn;
+#endif
+
 namespace cv
 {
 namespace dnn
@@ -78,18 +83,64 @@ public:
         normBySize = params.get<bool>("norm_by_size", true);
     }
 
+#ifdef HAVE_OPENCL
+    Ptr<OCL4DNNLRN<float> > lrnOp;
+#endif
+
     virtual bool supportBackend(int backendId)
     {
         return backendId == DNN_BACKEND_DEFAULT ||
                backendId == DNN_BACKEND_HALIDE && haveHalide();
     }
 
+#ifdef HAVE_OPENCL
+    bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        if (lrnOp.empty())
+        {
+            OCL4DNNLRNConfig config;
+            config.lrn_type = type == CHANNEL_NRM ?
+                              LRNParameter_NormRegion_ACROSS_CHANNELS :
+                              LRNParameter_NormRegion_WITHIN_CHANNEL;
+
+            CHECK_EQ(size % 2, 1)<< "LRN only supports odd values for local_size";
+            config.local_size = size;
+            config.alpha = alpha;
+            config.beta = beta;
+            config.k = bias;
+            CHECK_EQ(4, inputs[0]->dims) << "Input must have 4 axes, "
+                     << "corresponding to (num, channels, height, width)";
+            config.batch_size = inputs[0]->size[0];
+            config.channels = inputs[0]->size[1];
+            config.height = inputs[0]->size[2];
+            config.width = inputs[0]->size[3];
+            config.norm_by_size = normBySize;
+
+            lrnOp = Ptr<OCL4DNNLRN<float> >(new OCL4DNNLRN<float>(config));
+        }
+
+        UMat inpMat, outMat;
+        inpMat = inputs[0]->getUMat(ACCESS_READ);
+        outMat = outputs[0].getUMat(ACCESS_WRITE);
+
+        if (!lrnOp->Forward(inpMat, outMat))
+            return false;
+
+        return true;
+    }
+#endif
+
     void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
     {
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
         CV_Assert(inputs.size() == outputs.size());
+
+        CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+                   forward_ocl(inputs, outputs, internals))
+
         for (int i = 0; i < inputs.size(); i++)
         {
             CV_Assert(inputs[i]->dims == 4);
index f5a6a52..393c847 100644 (file)
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 
-// Copyright (C) 2016, Intel Corporation, all rights reserved.
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 
 /*
@@ -24,14 +24,20 @@ public:
     PaddingLayerImpl(const LayerParams &params)
     {
         setParamsFrom(params);
-        paddingDim = params.get<int>("padding_dim");
-        padding = params.get<int>("padding");
-        inputDims = params.get<int>("input_dims", 0);
-        index = params.get<int>("index", 0);
-        paddingValue = params.get<double>("value", 0);
-
-        if(paddingDim < 0 || padding < 0)
-            CV_Error(cv::Error::StsNotImplemented, "Negative padding and dim aren't supported");
+        paddingValue = params.get<float>("value", 0);
+        inputDims = params.get<int>("input_dims", -1);
+
+        CV_Assert(params.has("paddings"));
+        const DictValue& paddingsParam = params.get("paddings");
+        CV_Assert((paddingsParam.size() & 1) == 0);
+
+        paddings.resize(paddingsParam.size() / 2);
+        for (int i = 0; i < paddings.size(); ++i)
+        {
+            paddings[i].first = paddingsParam.get<int>(i * 2);  // Pad before.
+            paddings[i].second = paddingsParam.get<int>(i * 2 + 1);  // Pad after.
+            CV_Assert(paddings[i].first >= 0, paddings[i].second >= 0);
+        }
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -39,24 +45,48 @@ public:
                          std::vector<MatShape> &outputs,
                          std::vector<MatShape> &internals) const
     {
-        outputs.clear();
-        for(int i = 0; i < inputs.size(); i++)
+        CV_Assert(inputs.size() == 1);
+        const MatShape& inpShape = inputs[0];
+        CV_Assert(inpShape.size() >= paddings.size());
+        CV_Assert(inputDims == -1 || inpShape.size() == inputDims || inpShape.size() > paddings.size());
+
+        outputs.resize(1, inpShape);
+        int offset = (inputDims == -1 ? 0 : (inpShape.size() > inputDims ? 1 : 0));
+        for (int i = 0; i < paddings.size(); ++i)
         {
-            MatShape shape = inputs[i];
-            int dim = getPadDim(shape);
-            CV_Assert(dim < shape.size());
+            outputs[0][offset + i] = inpShape[offset + i] + paddings[i].first + paddings[i].second;
+        }
+        return false;
+    }
 
-            shape[dim] += padding;
-            outputs.push_back(shape);
+    void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs)
+    {
+        // Compute dstRanges.
+        const MatSize& inpShape = inputs[0]->size;
+        dstRanges.resize(paddings.size());
+
+        int offset = 0;
+        if (inputDims != -1 && inputs[0]->dims != inputDims)
+        {
+            dstRanges.insert(dstRanges.begin(), Range::all());
+            offset = 1;
         }
 
-        return false;
+        for (int i = 0; i < paddings.size(); ++i)
+        {
+            dstRanges[offset + i].start = paddings[i].first;
+            dstRanges[offset + i].end = paddings[i].first + inpShape[offset + i];
+        }
+
+        // Add the rest of dimensions.
+        for (int i = dstRanges.size(); i < inputs[0]->dims; ++i)
+            dstRanges.push_back(Range::all());
     }
 
     virtual bool supportBackend(int backendId)
     {
         return backendId == DNN_BACKEND_DEFAULT ||
-               backendId == DNN_BACKEND_HALIDE && haveHalide();
+               backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4;
     }
 
     void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
@@ -64,50 +94,18 @@ public:
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-        for(int i = 0; i < inputs.size(); i++)
-        {
-            outputs[i] = paddingValue;
-            const Mat& inp = *inputs[i];
-            Mat& out = outputs[i];
-            int dims = inp.dims;
-            MatShape inShape(inp.size.p, inp.size.p + dims);
-            MatShape outShape(out.size.p, out.size.p + dims);
-            int dim = getPadDim(inShape);
-
-            int actualIndex = index;
-            if(index == 0)
-                actualIndex = inShape[dim];
-
-            std::vector<std::pair<Range, Range> > srcDstRanges;
-            srcDstRanges.push_back(std::make_pair(Range(0, actualIndex), Range(0, actualIndex)));
-            srcDstRanges.push_back(std::make_pair(Range(actualIndex, inShape[dim]),
-                                                  Range(actualIndex + padding, outShape[dim])));
-
-            std::vector<Range> srcRanges(dims, Range::all()), dstRanges = srcRanges;
-
-            for(int j = 0; j < srcDstRanges.size(); j++)
-            {
-                if(!srcDstRanges[j].first.empty())
-                {
-                    srcRanges[dim] = srcDstRanges[j].first;
-                    dstRanges[dim] = srcDstRanges[j].second;
-                    Mat dst = out(&dstRanges[0]);
-                    Mat src = inp(&srcRanges[0]).clone();
-                    src.copyTo(dst);
-                }
-            }
-        }
-    }
-
-    int getPadDim(const MatShape& shape) const
-    {
-        return inputDims > 0 && (int)shape.size() > inputDims ? paddingDim + 1 : paddingDim;
+        outputs[0].setTo(paddingValue);
+        inputs[0]->copyTo(outputs[0](dstRanges));
     }
 
     virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
     {
 #ifdef HAVE_HALIDE
         int inW, inH, inC, inN;
+        int minN = std::max(dstRanges[0].start, 0);
+        int minC = std::max(dstRanges[1].start, 0);
+        int minY = std::max(dstRanges[2].start, 0);
+        int minX = std::max(dstRanges[3].start, 0);
         Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
         getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
 
@@ -115,13 +113,16 @@ public:
         Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
         Halide::Func padded =
             Halide::BoundaryConditions::constant_exterior(inputBuffer, paddingValue);
-        top(x, y, c, n) = padded(x, y, c, n);
+        top(x, y, c, n) = padded(x - minX, y - minY, c - minC, n - minN);
         return Ptr<BackendNode>(new HalideBackendNode(top));
 #endif  // HAVE_HALIDE
         return Ptr<BackendNode>();
     }
 
-    int paddingDim, padding, inputDims, index;
+private:
+    std::vector<std::pair<int, int> > paddings;  // Pairs pad before, pad after.
+    std::vector<Range> dstRanges;
+    int inputDims;
     float paddingValue;
 };
 
index b54b52d..c27315b 100644 (file)
 #include "layers_common.hpp"
 #include "opencv2/core/hal/intrin.hpp"
 #include "op_halide.hpp"
+#include "opencl_kernels_dnn.hpp"
 #include <float.h>
 #include <algorithm>
 using std::max;
 using std::min;
+#ifdef HAVE_OPENCL
+using namespace cv::dnn::ocl4dnn;
+#endif
 
 namespace cv
 {
@@ -81,6 +85,10 @@ public:
         ceilMode = params.get<bool>("ceil_mode", true);
     }
 
+#ifdef HAVE_OPENCL
+    Ptr<OCL4DNNPool<float> > poolOp;
+#endif
+
     void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs)
     {
         CV_Assert(inputs.size() == 1);
@@ -104,11 +112,59 @@ public:
                 type == PoolingLayer::AVE && !pad.width && !pad.height);
     }
 
+#ifdef HAVE_OPENCL
+    bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        if (poolOp.empty())
+        {
+            OCL4DNNPoolConfig config;
+
+            config.in_shape = shape(*inputs[0]);
+            config.out_shape = shape(outputs[0]);
+            config.kernel = kernel;
+            config.pad = pad;
+            config.stride = stride;
+            config.channels = inputs[0]->size[1];
+            config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX :
+                                (type == AVE ? LIBDNN_POOLING_METHOD_AVE :
+                                               LIBDNN_POOLING_METHOD_STO);
+            poolOp = Ptr<OCL4DNNPool<float> >(new OCL4DNNPool<float>(config));
+        }
+
+        for (size_t ii = 0; ii < inputs.size(); ii++)
+        {
+            UMat inpMat, outMat, maskMat;
+
+            inpMat = inputs[ii]->getUMat(ACCESS_READ);
+
+            if (type == MAX)
+            {
+                outMat = outputs[2 * ii].getUMat(ACCESS_WRITE);
+                maskMat = outputs[2 * ii + 1].getUMat(ACCESS_WRITE);
+            } else {
+                outMat = outputs[ii].getUMat(ACCESS_WRITE);
+                maskMat = UMat();
+            }
+
+            CV_Assert(inpMat.offset == 0 && outMat.offset == 0);
+
+            if (!poolOp->Forward(inpMat, outMat, maskMat))
+                return false;
+        }
+
+        return true;
+    }
+#endif
+
     void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
     {
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
+        CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+                   forward_ocl(inputs, outputs, internals))
+
         for (size_t ii = 0; ii < inputs.size(); ii++)
         {
             switch (type)
index 8fa99ac..75831d0 100644 (file)
@@ -124,6 +124,20 @@ public:
         }
     }
 
+    void getScales(const LayerParams &params)
+    {
+        DictValue scalesParameter;
+        bool scalesRetieved = getParameterDict(params, "scales", scalesParameter);
+        if (scalesRetieved)
+        {
+            _scales.resize(scalesParameter.size());
+            for (int i = 0; i < scalesParameter.size(); ++i)
+            {
+                _scales[i] = scalesParameter.get<float>(i);
+            }
+        }
+    }
+
     void getVariance(const LayerParams &params)
     {
         DictValue varianceParameter;
@@ -169,13 +183,14 @@ public:
         _flip = getParameter<bool>(params, "flip");
         _clip = getParameter<bool>(params, "clip");
 
+        _scales.clear();
         _aspectRatios.clear();
-        _aspectRatios.push_back(1.);
 
         getAspectRatios(params);
         getVariance(params);
+        getScales(params);
 
-        _numPriors = _aspectRatios.size();
+        _numPriors = _aspectRatios.size() + 1;  // + 1 for an aspect ratio 1.0
 
         _maxSize = -1;
         if (params.has("max_size"))
@@ -231,6 +246,11 @@ public:
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
+        if (_scales.empty())
+            _scales.resize(_numPriors, 1.0f);
+        else
+            CV_Assert(_scales.size() == _numPriors);
+
         int _layerWidth = inputs[0]->size[3];
         int _layerHeight = inputs[0]->size[2];
 
@@ -256,7 +276,7 @@ public:
         {
             for (size_t w = 0; w < _layerWidth; ++w)
             {
-                _boxWidth = _boxHeight = _minSize;
+                _boxWidth = _boxHeight = _minSize * _scales[0];
 
                 float center_x = (w + 0.5) * stepX;
                 float center_y = (h + 0.5) * stepY;
@@ -272,7 +292,7 @@ public:
                 if (_maxSize > 0)
                 {
                     // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
-                    _boxWidth = _boxHeight = sqrt(_minSize * _maxSize);
+                    _boxWidth = _boxHeight = sqrt(_minSize * _maxSize) * _scales[1];
                     // xmin
                     outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
                     // ymin
@@ -284,15 +304,13 @@ public:
                 }
 
                 // rest of priors
+                CV_Assert((_maxSize > 0 ? 2 : 1) + _aspectRatios.size() == _scales.size());
                 for (size_t r = 0; r < _aspectRatios.size(); ++r)
                 {
                     float ar = _aspectRatios[r];
-                    if (fabs(ar - 1.) < 1e-6)
-                    {
-                        continue;
-                    }
-                    _boxWidth = _minSize * sqrt(ar);
-                    _boxHeight = _minSize / sqrt(ar);
+                    float scale = _scales[(_maxSize > 0 ? 2 : 1) + r];
+                    _boxWidth = _minSize * sqrt(ar) * scale;
+                    _boxHeight = _minSize / sqrt(ar) * scale;
                     // xmin
                     outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
                     // ymin
@@ -363,6 +381,7 @@ public:
 
     std::vector<float> _aspectRatios;
     std::vector<float> _variance;
+    std::vector<float> _scales;
 
     bool _flip;
     bool _clip;
index 10a6f74..a40bcc6 100644 (file)
@@ -90,6 +90,8 @@ class LSTMLayerImpl : public LSTMLayer
 
     bool useTimestampDim;
     bool produceCellOutput;
+    float forgetBias, cellClip;
+    bool useCellClip, usePeephole;
 
 public:
 
@@ -97,9 +99,40 @@ public:
         : numTimeStamps(0), numSamples(0)
     {
         setParamsFrom(params);
-        type = "LSTM";
-        useTimestampDim = true;
-        produceCellOutput = false;
+
+        if (!blobs.empty())
+        {
+            CV_Assert(blobs.size() >= 3);
+
+            blobs[2] = blobs[2].reshape(1, 1);
+
+            const Mat& Wh = blobs[0];
+            const Mat& Wx = blobs[1];
+            const Mat& bias = blobs[2];
+            CV_Assert(Wh.dims == 2 && Wx.dims == 2);
+            CV_Assert(Wh.rows == Wx.rows);
+            CV_Assert(Wh.rows == 4*Wh.cols);
+            CV_Assert(Wh.rows == (int)bias.total());
+            CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type());
+
+            // Peephole weights.
+            if (blobs.size() > 3)
+            {
+                CV_Assert(blobs.size() == 6);
+                for (int i = 3; i < 6; ++i)
+                {
+                    CV_Assert(blobs[i].rows == Wh.cols && blobs[i].cols == Wh.cols);
+                    CV_Assert(blobs[i].type() == bias.type());
+                }
+            }
+        }
+        useTimestampDim = params.get<bool>("use_timestamp_dim", true);
+        produceCellOutput = params.get<bool>("produce_cell_output", false);
+        forgetBias = params.get<float>("forget_bias", 0.0f);
+        cellClip = params.get<float>("cell_clip", 0.0f);
+        useCellClip = params.get<bool>("use_cell_clip", false);
+        usePeephole = params.get<bool>("use_peephole", false);
+
         allocated = false;
         outTailShape.clear();
     }
@@ -141,7 +174,7 @@ public:
                          std::vector<MatShape> &outputs,
                          std::vector<MatShape> &internals) const
     {
-        CV_Assert(blobs.size() == 3);
+        CV_Assert(!usePeephole && blobs.size() == 3 || usePeephole && blobs.size() == 6);
         CV_Assert(inputs.size() == 1);
         const MatShape& inp0 = inputs[0];
 
@@ -186,7 +219,7 @@ public:
 
     void finalize(const std::vector<Mat*> &input, std::vector<Mat> &output)
     {
-        CV_Assert(blobs.size() == 3);
+        CV_Assert(!usePeephole && blobs.size() == 3 || usePeephole && blobs.size() == 6);
         CV_Assert(input.size() == 1);
         const Mat& inp0 = *input[0];
 
@@ -251,13 +284,27 @@ public:
             gemm(hInternal, Wh, 1, gates, 1, gates, GEMM_2_T);  //+Wh * h_{t-1}
             gemm(dummyOnes, bias, 1, gates, 1, gates);          //+b
 
-            Mat getesIFO = gates.colRange(0, 3*numOut);
             Mat gateI = gates.colRange(0*numOut, 1*numOut);
             Mat gateF = gates.colRange(1*numOut, 2*numOut);
             Mat gateO = gates.colRange(2*numOut, 3*numOut);
             Mat gateG = gates.colRange(3*numOut, 4*numOut);
 
-            sigmoid(getesIFO, getesIFO);
+            if (forgetBias)
+                add(gateF, forgetBias, gateF);
+
+            if (usePeephole)
+            {
+                Mat gatesIF = gates.colRange(0, 2*numOut);
+                gemm(cInternal, blobs[3], 1, gateI, 1, gateI);
+                gemm(cInternal, blobs[4], 1, gateF, 1, gateF);
+                sigmoid(gatesIF, gatesIF);
+            }
+            else
+            {
+                Mat gatesIFO = gates.colRange(0, 3*numOut);
+                sigmoid(gatesIFO, gatesIFO);
+            }
+
             tanh(gateG, gateG);
 
             //compute c_t
@@ -265,6 +312,17 @@ public:
             multiply(gateI, gateG, gateI);      // i_t (*) g_t
             add(gateF, gateI, cInternal);       // c_t = f_t (*) c_{t-1} + i_t (*) g_t
 
+            if (useCellClip)
+            {
+                min(cInternal, cellClip, cInternal);
+                max(cInternal, -cellClip, cInternal);
+            }
+            if (usePeephole)
+            {
+                gemm(cInternal, blobs[5], 1, gateO, 1, gateO);
+                sigmoid(gateO, gateO);
+            }
+
             //compute h_t
             tanh(cInternal, hInternal);
             multiply(gateO, hInternal, hInternal);
diff --git a/modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp b/modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp
new file mode 100644 (file)
index 0000000..f302597
--- /dev/null
@@ -0,0 +1,71 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+#include <opencv2/imgproc.hpp>
+
+namespace cv { namespace dnn {
+
+class ResizeNearestNeighborLayerImpl : public ResizeNearestNeighborLayer
+{
+public:
+    ResizeNearestNeighborLayerImpl(const LayerParams& params)
+    {
+        setParamsFrom(params);
+        CV_Assert(params.has("width"), params.has("height"));
+        outWidth = params.get<float>("width");
+        outHeight = params.get<float>("height");
+        alignCorners = params.get<bool>("align_corners", false);
+        if (alignCorners)
+            CV_Error(Error::StsNotImplemented, "Nearest neighborhood resize with align_corners=true is not implemented");
+    }
+
+    bool getMemoryShapes(const std::vector<MatShape> &inputs,
+                         const int requiredOutputs,
+                         std::vector<MatShape> &outputs,
+                         std::vector<MatShape> &internals) const
+    {
+        CV_Assert(inputs.size() == 1, inputs[0].size() == 4);
+        outputs.resize(1, inputs[0]);
+        outputs[0][2] = outHeight;
+        outputs[0][3] = outWidth;
+        // We can work in-place (do nothing) if input shape == output shape.
+        return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
+    }
+
+    void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        CV_TRACE_FUNCTION();
+        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+        if (outHeight == inputs[0]->size[2] && outWidth == inputs[0]->size[3])
+            return;
+
+        Mat& inp = *inputs[0];
+        Mat& out = outputs[0];
+        for (size_t n = 0; n < inputs[0]->size[0]; ++n)
+        {
+            for (size_t ch = 0; ch < inputs[0]->size[1]; ++ch)
+            {
+                resize(getPlane(inp, n, ch), getPlane(out, n, ch),
+                       Size(outWidth, outHeight), 0, 0, INTER_NEAREST);
+            }
+        }
+    }
+private:
+    int outWidth, outHeight;
+    bool alignCorners;
+};
+
+
+Ptr<ResizeNearestNeighborLayer> ResizeNearestNeighborLayer::create(const LayerParams& params)
+{
+    return Ptr<ResizeNearestNeighborLayer>(new ResizeNearestNeighborLayerImpl(params));
+}
+
+}  // namespace dnn
+}  // namespace cv
index 2d04c8a..f3c4a0c 100644 (file)
@@ -33,6 +33,7 @@ public:
                          std::vector<MatShape> &outputs,
                          std::vector<MatShape> &internals) const
     {
+        CV_Assert(blobs.size() == 1 + hasBias);
         Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
         return true;
     }
@@ -48,8 +49,6 @@ public:
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-        CV_Assert(blobs.size() == 1 + hasBias);
-
         for (size_t ii = 0; ii < outputs.size(); ii++)
         {
             Mat &inpBlob = *inputs[ii];
index 86313e3..c7db0f4 100644 (file)
@@ -56,14 +56,40 @@ public:
     {
         setParamsFrom(params);
         axis = params.get<int>("axis", 1);
-
         if (params.has("slice_point"))
         {
+            CV_Assert(!params.has("begin") && !params.has("size"));
             const DictValue &indicesValue = params.get("slice_point");
-            int i, n = indicesValue.size();
-            sliceIndices.resize(n);
-            for (i = 0; i < n; i++)
-                sliceIndices[i] = indicesValue.get<int>(i);
+            sliceRanges.resize(indicesValue.size() + 1,
+                               std::vector<Range>(axis + 1, Range::all()));
+            int prevSlice = 0;
+            for (int i = 0; i < indicesValue.size(); ++i)
+            {
+                sliceRanges[i][axis].start = prevSlice;
+                sliceRanges[i][axis].end = indicesValue.get<int>(i);
+                prevSlice = sliceRanges[i][axis].end;
+            }
+            sliceRanges.back()[axis].start = prevSlice;
+        }
+        else if (params.has("begin") && params.has("size"))
+        {
+            const DictValue &begins = params.get("begin");
+            const DictValue &sizes = params.get("size");
+            CV_Assert(begins.size() == sizes.size());
+
+            sliceRanges.resize(1);
+            sliceRanges[0].resize(begins.size(), Range::all());
+            for (int i = 0; i < begins.size(); ++i)
+            {
+                int start = begins.get<int>(i);
+                int size = sizes.get<int>(i);
+                CV_Assert(start >= 0);
+                CV_Assert(size == -1 || size > 0);  // -1 value means range [start, axis_size).
+
+                sliceRanges[0][i].start = start;
+                if (size > 0)
+                    sliceRanges[0][i].end = start + size;
+            }
         }
     }
 
@@ -73,47 +99,68 @@ public:
                             std::vector<MatShape> &internals) const
     {
         CV_Assert(inputs.size() == 1);
-
-        outputs.clear();
-
         MatShape inpShape = inputs[0];
-        int cAxis = clamp(axis, inpShape.size());
-        int axisSize = inpShape[cAxis];
 
-        if (sliceIndices.size()) //divide blob with respect to passed parameters
+        if (!sliceRanges.empty())
         {
-           std::vector<int> outAxisSize;
-           int prevSlice = 0;
-
-           for (size_t i = 0; i < sliceIndices.size(); i++)
-           {
-               if (!(prevSlice < sliceIndices[i] && sliceIndices[i] < axisSize))
-                   CV_Error(Error::StsBadArg, "Slice indices should be positive, increased and don't exceed size of sliced dimension");
-
-               outAxisSize.push_back(sliceIndices[i] - prevSlice);
-               prevSlice = sliceIndices[i];
-            }
-            outAxisSize.push_back(axisSize - prevSlice);
-
-            for (size_t i = 0; i < outAxisSize.size(); i++)
+            outputs.resize(sliceRanges.size(), inpShape);
+            for (int i = 0; i < outputs.size(); ++i)
             {
-               inpShape[cAxis] = outAxisSize[i];
-              outputs.push_back(inpShape);
+                CV_Assert(sliceRanges[i].size() <= inpShape.size());
+                for (int j = 0; j < sliceRanges[i].size(); ++j)
+                {
+                    outputs[i][j] = std::min(sliceRanges[i][j].end, inpShape[j]) -
+                                    std::max(sliceRanges[i][j].start, 0);
+                }
             }
         }
-        else //divide blob with respect to count of output blobs
+        else  // Divide input blob on equal parts by axis.
         {
-           CV_Assert(requiredOutputs > 0 && axisSize % requiredOutputs == 0);
-           int outAxisSize = axisSize / (int)requiredOutputs;
+            CV_Assert(0 <= axis && axis < inpShape.size());
+            CV_Assert(requiredOutputs > 0 && inpShape[axis] % requiredOutputs == 0);
+            inpShape[axis] /= requiredOutputs;
+            outputs.resize(requiredOutputs, inpShape);
+        }
+        return false;
+    }
 
-           for (size_t i = 0; i < requiredOutputs; i++)
+    void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs)
+    {
+        CV_Assert(inputs.size() == 1);
+        const MatSize& inpShape = inputs[0]->size;
+
+        if (sliceRanges.empty())
+        {
+            // Divide input blob on equal parts by axis.
+            int outAxisSize = inpShape[axis] / outputs.size();
+            sliceRanges.resize(outputs.size(),
+                               std::vector<Range>(axis + 1, Range::all()));
+            int prevSlice = 0;
+            for (int i = 0; i < outputs.size(); ++i)
             {
-               inpShape[cAxis] = outAxisSize;
-               outputs.push_back(inpShape);
+                sliceRanges[i][axis].start = prevSlice;
+                sliceRanges[i][axis].end = sliceRanges[i][axis].start + outAxisSize;
+                prevSlice = sliceRanges[i][axis].end;
             }
         }
+        else
+            CV_Assert(outputs.size() == sliceRanges.size());
 
-        return false;
+        for (int i = 0; i < outputs.size(); ++i)
+        {
+            CV_Assert(sliceRanges[i].size() <= inpShape[-1]);
+            // Clamp.
+            for (int j = 0; j < sliceRanges[i].size(); ++j)
+            {
+                sliceRanges[i][j].start = std::max(0, sliceRanges[i][j].start);
+                sliceRanges[i][j].end = std::min(sliceRanges[i][j].end, inpShape[j]);
+            }
+            // Fill the rest of ranges.
+            for (int j = sliceRanges[i].size(); j < inpShape[-1]; ++j)
+            {
+                sliceRanges[i].push_back(Range::all());
+            }
+        }
     }
 
     void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
@@ -122,15 +169,10 @@ public:
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
         const Mat& inpMat = *inputs[0];
-        std::vector<Range> ranges(inpMat.dims, Range::all());
-        int cAxis = clamp(axis, inpMat.dims);
-
-        ranges[cAxis].start = 0;
+        CV_Assert(outputs.size() == sliceRanges.size());
         for (size_t i = 0; i < outputs.size(); i++)
         {
-            ranges[cAxis].end = ranges[cAxis].start + outputs[i].size[cAxis];
-            inpMat(&ranges[0]).copyTo(outputs[i]);
-            ranges[cAxis].start = ranges[cAxis].end;
+            inpMat(sliceRanges[i]).copyTo(outputs[i]);
         }
     }
 };
index 828557d..fd14e29 100644 (file)
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "op_halide.hpp"
+#include "opencl_kernels_dnn.hpp"
 #include <algorithm>
 #include <stdlib.h>
 using std::max;
+#ifdef HAVE_OPENCL
+using namespace cv::dnn::ocl4dnn;
+#endif
 
 namespace cv
 {
@@ -63,6 +67,10 @@ public:
         setParamsFrom(params);
     }
 
+#ifdef HAVE_OPENCL
+    Ptr<OCL4DNNSoftmax<float> > softmaxOp;
+#endif
+
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
                          const int requiredOutputs,
                          std::vector<MatShape> &outputs,
@@ -82,11 +90,91 @@ public:
                backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1;
     }
 
+#ifdef HAVE_OPENCL
+    bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        if (softmaxOp.empty())
+        {
+            OCL4DNNSoftmaxConfig config;
+
+            config.in_shape = shape(*inputs[0]);
+            config.axis = axisRaw;
+            config.channels = inputs[0]->size[axisRaw];
+
+            softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));
+        }
+
+        UMat srcMat, dstMat;
+        srcMat = inputs[0]->getUMat(ACCESS_READ);
+        dstMat = outputs[0].getUMat(ACCESS_WRITE);
+
+        if (!logSoftMax && softmaxOp->Forward(srcMat, dstMat))
+            return true;
+
+        const Mat &src = *inputs[0];
+        UMat bufMat = internals[0].getUMat(ACCESS_WRITE);
+        srcMat.copyTo(dstMat);
+
+        int axis = clamp(axisRaw, src.dims);
+        size_t outerSize = src.total(0, axis);
+        size_t channels = src.size[axis];
+        size_t innerSize = src.total(axis + 1);
+
+        String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
+        ocl::Kernel kmax, ksub, ksum, kdiv;
+
+        if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
+            return false;
+
+        if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
+            return false;
+
+        if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
+            return false;
+
+        if (logSoftMax) buildOpts += " -DLOG_SOFTMAX ";
+        if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
+            return false;
+
+        size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
+        size_t bufSize = internals[0].total();
+        size_t totalSize = src.total();
+
+        kmax.args((int)outerSize, (int)channels, (int)innerSize,
+                  ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
+        if (!kmax.run(1, &bufSize, &wgSize, false))
+            return false;
+
+        ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
+                  ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
+        if (!ksub.run(1, &totalSize, &wgSize, false))
+            return false;
+
+        cv::exp(dstMat, dstMat);
+
+        ksum.args((int)outerSize, (int)channels, (int)innerSize,
+                  ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
+        if (!ksum.run(1, &bufSize, &wgSize, false))
+            return false;
+
+        kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
+                  ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
+        if (!kdiv.run(1, &totalSize, &wgSize, false))
+            return false;
+
+        return true;
+    }
+#endif
+
     void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
     {
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
+        CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+                   forward_ocl(inputs, outputs, internals))
+
         const Mat &src = *inputs[0];
         Mat &dst = outputs[0];
 
diff --git a/modules/dnn/src/ocl4dnn/include/common.hpp b/modules/dnn/src/ocl4dnn/include/common.hpp
new file mode 100644 (file)
index 0000000..4146642
--- /dev/null
@@ -0,0 +1,62 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef _OPENCV_LIBDNN_COMMON_HPP_
+#define _OPENCV_LIBDNN_COMMON_HPP_
+#include "../../precomp.hpp"
+#include "../../caffe/glog_emulator.hpp"
+#include <opencv2/core/opencl/runtime/opencl_core.hpp>
+
+#ifdef HAVE_OPENCL
+
+// Macro to select the single (_float) or double (_double) precision kernel
+#define CL_KERNEL_SELECT(kernel) kernel "_float"
+
+#define OCL_CHECK(condition) \
+    do { \
+        cl_int error = (condition); \
+        CHECK_EQ(error, CL_SUCCESS) << " " << cv::ocl::getOpenCLErrorString(error); \
+    } while (0)
+
+bool clOptionSupport(cv::String option);
+
+#endif // HAVE_OPENCL
+#endif
diff --git a/modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp b/modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp
new file mode 100644 (file)
index 0000000..df3e321
--- /dev/null
@@ -0,0 +1,854 @@
+#ifndef _OPENCV_OCL4DNN_DEFAULT_KERNEL_CONFIG_HPP_
+#define _OPENCV_OCL4DNN_DEFAULT_KERNEL_CONFIG_HPP_
+const char *default_kernel_config_intel[] = {
+// Below is the information for OpenCL based on which these configurations tuned
+/*******************************************************************************
+Number of platforms                               1
+  Platform Name                                   Intel(R) OpenCL
+  Platform Vendor                                 Intel(R) Corporation
+  Platform Version                                OpenCL 2.0
+  Platform Profile                                FULL_PROFILE
+  Platform Extensions                             cl_intel_accelerator cl_intel_advanced_motion_estimation cl_intel_device_side_avc_motion_estimation cl_intel_driver_diagnostics cl_intel_media_block_io cl_intel_motion_estimation cl_intel_planar_yuv cl_intel_packed_yuv cl_intel_required_subgroup_size cl_intel_subgroups cl_intel_subgroups_short cl_intel_va_api_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_depth_images cl_khr_fp16 cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_khr_spir cl_khr_subgroups
+  Platform Extensions function suffix             INTEL
+
+  Platform Name                                   Intel(R) OpenCL
+Number of devices                                 1
+  Device Name                                     Intel(R) HD Graphics
+  Device Vendor                                   Intel(R) Corporation
+  Device Vendor ID                                0x8086
+  Device Version                                  OpenCL 2.0
+  Driver Version                                  r4.1.61547
+  Device OpenCL C Version                         OpenCL C 2.0
+  Device Type                                     GPU
+  Device Profile                                  FULL_PROFILE
+  Max compute units                               72
+  Max clock frequency                             950MHz
+  Device Partition                                (core)
+    Max number of sub-devices                     0
+    Supported partition types                     by <unknown> (0x7FE000000000)
+  Max work item dimensions                        3
+  Max work item sizes                             256x256x256
+  Max work group size                             256
+  Preferred work group size multiple              32
+  Preferred / native vector sizes
+    char                                                16 / 16
+    short                                                8 / 8
+    int                                                  4 / 4
+    long                                                 1 / 1
+    half                                                 8 / 8        (cl_khr_fp16)
+    float                                                1 / 1
+    double                                               1 / 1        (cl_khr_fp64)
+  Half-precision Floating-point support           (cl_khr_fp16)
+    Denormals                                     Yes
+    Infinity and NANs                             Yes
+    Round to nearest                              Yes
+    Round to zero                                 Yes
+    Round to infinity                             Yes
+    IEEE754-2008 fused multiply-add               Yes
+    Support is emulated in software               No
+    Correctly-rounded divide and sqrt operations  No
+  Single-precision Floating-point support         (core)
+    Denormals                                     Yes
+    Infinity and NANs                             Yes
+    Round to nearest                              Yes
+    Round to zero                                 Yes
+    Round to infinity                             Yes
+    IEEE754-2008 fused multiply-add               Yes
+    Support is emulated in software               No
+    Correctly-rounded divide and sqrt operations  Yes
+  Double-precision Floating-point support         (cl_khr_fp64)
+    Denormals                                     Yes
+    Infinity and NANs                             Yes
+    Round to nearest                              Yes
+    Round to zero                                 Yes
+    Round to infinity                             Yes
+    IEEE754-2008 fused multiply-add               Yes
+    Support is emulated in software               No
+    Correctly-rounded divide and sqrt operations  No
+  Address bits                                    64, Little-Endian
+  Global memory size                              26887677543 (25.04GiB)
+  Error Correction support                        No
+  Max memory allocation                           4294959103 (4GiB)
+  Unified memory for Host and Device              Yes
+  Shared Virtual Memory (SVM) capabilities        (core)
+    Coarse-grained buffer sharing                 Yes
+    Fine-grained buffer sharing                   No
+    Fine-grained system sharing                   No
+    Atomics                                       No
+  Minimum alignment for any data type             128 bytes
+  Alignment of base address                       1024 bits (128 bytes)
+  Preferred alignment for atomics
+    SVM                                           64 bytes
+    Global                                        64 bytes
+    Local                                         64 bytes
+  Max size for global variable                    65536 (64KiB)
+  Preferred total size of global vars             4294959103 (4GiB)
+  Global Memory cache type                        Read/Write
+  Global Memory cache size                        1572864
+  Global Memory cache line                        64 bytes
+  Image support                                   Yes
+    Max number of samplers per kernel             16
+    Max size for 1D images from buffer            268434943 pixels
+    Max 1D or 2D image array size                 2048 images
+    Base address alignment for 2D image buffers   4 bytes
+    Pitch alignment for 2D image buffers          4 bytes
+    Max 2D image size                             16384x16384 pixels
+    Max 3D image size                             16384x16384x2048 pixels
+    Max number of read image args                 128
+    Max number of write image args                128
+    Max number of read/write image args           128
+  Max number of pipe args                         16
+  Max active pipe reservations                    1
+  Max pipe packet size                            1024
+  Local memory type                               Local
+  Local memory size                               65536 (64KiB)
+  Max constant buffer size                        4294959103 (4GiB)
+  Max number of constant args                     8
+  Max size of kernel argument                     1024
+  Queue properties (on host)
+    Out-of-order execution                        Yes
+    Profiling                                     Yes
+  Queue properties (on device)
+    Out-of-order execution                        Yes
+    Profiling                                     Yes
+    Preferred size                                131072 (128KiB)
+    Max size                                      67108864 (64MiB)
+  Max queues on device                            1
+  Max events on device                            1024
+  Prefer user sync for interop                    Yes
+  Profiling timer resolution                      83ns
+  Execution capabilities
+    Run OpenCL kernels                            Yes
+    Run native kernels                            No
+    SPIR versions                                 1.2
+  printf() buffer size                            4194304 (4MiB)
+  Built-in kernels                                block_motion_estimate_intel;block_advanced_motion_estimate_check_intel;block_advanced_motion_estimate_bidirectional_check_intel
+  Motion Estimation accelerator version        (Intel)   2
+  Device Available                                Yes
+  Compiler Available                              Yes
+  Linker Available                                Yes
+  Device Extensions                               cl_intel_accelerator cl_intel_advanced_motion_estimation cl_intel_device_side_avc_motion_estimation cl_intel_driver_diagnostics cl_intel_media_block_io cl_intel_motion_estimation cl_intel_planar_yuv cl_intel_packed_yuv cl_intel_required_subgroup_size cl_intel_subgroups cl_intel_subgroups_short cl_intel_va_api_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_depth_images cl_khr_fp16 cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_khr_spir cl_khr_subgroups
+
+NULL platform behavior
+  clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ...)  No platform
+  clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, ...)   No platform
+  clCreateContext(NULL, ...) [default]            No platform
+  clCreateContext(NULL, ...) [other]              Success [INTEL]
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_CPU)  No platform
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_GPU)  No platform
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_ACCELERATOR)  No platform
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_CUSTOM)  No platform
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_ALL)  No platform
+********************************************************************************/
+"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","4 6 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32","12 2 8 2 1 1 8 1 0 ",
+"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64","1 8 32 5 1 8 1 1 0 ",
+"EU72_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128","4 2 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","1 8 32 5 1 8 1 1 0 ",
+"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192","2 7 16 2 1 1 16 1 0 ",
+"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48","4 3 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","4 6 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96","1 8 32 5 1 8 1 1 0 ",
+"EU72_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64","4 1 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64","8 3 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","4 6 8 2 1 1 8 1 0 ",
+"EU72_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4","14 1 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4","4 4 8 2 1 1 8 1 0 ",
+"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128","4 2 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192","1 8 32 5 1 8 1 1 0 ",
+"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208","2 6 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","8 3 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384","1 8 32 5 1 8 1 1 0 ",
+"EU72_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320","2 5 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","8 3 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256","1 8 32 5 1 8 1 1 0 ",
+"EU72_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32","4 6 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn16_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M4","12 2 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64","2 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16","8 3 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn32_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M128","1 16 32 5 1 16 1 1 0 ",
+"EU72_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32","3 6 16 2 1 1 16 1 0 ",
+"EU72_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32","1 16 32 5 1 16 1 1 0 ",
+"EU72_k1x1_cn128_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M512","2 8 32 5 1 8 1 1 0 ",
+"EU72_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384","2 7 16 2 1 1 16 1 0 ",
+"EU72_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4","1 1 1 4 1 1 1 0 1 ",
+"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96","4 5 16 2 1 1 16 1 0 ",
+"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192","10 2 16 2 1 1 16 1 0 ",
+"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192","6 4 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn4_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M16","2 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96","8 3 8 2 1 1 8 1 0 ",
+"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32","8 1 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384","4 7 8 2 1 1 8 1 0 ",
+"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256","2 6 16 2 1 1 16 1 0 ",
+"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128","6 4 16 2 1 1 16 1 0 ",
+"EU72_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64","4 4 16 2 1 1 16 1 0 ",
+"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48","4 3 16 2 1 1 16 1 0 ",
+"EU72_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5","2 3 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24","8 2 8 2 1 1 8 1 0 ",
+"EU72_k3x3_cn128_g1_s1x1_d1x1_b0_in32x32_p1x1_num1_M128","1 8 32 5 1 8 1 1 0 ",
+"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128","2 7 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M32","1 16 32 5 1 16 1 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112","8 2 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","1 8 32 5 1 8 1 1 0 ",
+"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128","4 3 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64","1 16 32 5 1 16 1 1 0 ",
+"EU72_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M16","2 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","8 2 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn16_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M64","1 16 32 5 1 16 1 1 0 ",
+"EU72_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224","2 7 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256","4 6 8 2 1 1 8 1 0 ",
+"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96","4 3 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M256","1 8 32 5 1 8 1 1 0 ",
+"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192","10 2 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64","12 2 8 2 1 1 8 1 0 ",
+"EU72_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128","2 5 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48","4 6 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256","8 3 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn256_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","2 8 32 5 1 8 1 1 0 ",
+"EU72_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288","2 5 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn1024_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M256","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M1024","1 16 32 5 1 16 1 1 0 ",
+"EU72_k1x1_cn2048_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M512","4 6 8 2 1 1 8 1 0 ",
+"EU72_k3x3_cn512_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M512","2 5 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16","8 2 8 2 1 1 8 1 0 ",
+"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64","4 2 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","8 3 8 2 1 1 8 1 0 ",
+"EU72_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288","2 7 16 2 1 1 16 1 0 ",
+"EU72_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16","2 5 16 2 1 1 16 1 0 ",
+"EU72_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M4","8 3 8 2 1 1 8 1 0 ",
+"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256","2 7 16 2 1 1 16 1 0 ",
+"EU72_k3x3_cn256_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M256","2 5 16 2 1 1 16 1 0 ",
+"EU72_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224","2 5 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
+"EU72_k2x2_cn16_g1_s2x2_d1x1_b0_in256x256_p0x0_num1_M16","6 4 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192","4 6 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M512","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","1 8 32 5 1 8 1 1 0 ",
+"EU72_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192","2 5 16 2 1 1 16 1 0 ",
+"EU72_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128","4 3 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","8 2 8 2 1 1 8 1 0 ",
+"EU72_k2x2_cn64_g1_s2x2_d1x1_b0_in128x128_p0x0_num1_M32","8 3 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M256","1 16 32 5 1 16 1 1 0 ",
+"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32","12 2 8 2 1 1 8 1 0 ",
+"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32","4 2 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16","12 1 8 2 1 1 8 1 0 ",
+"EU72_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256","4 7 8 2 1 1 8 1 0 ",
+"EU72_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384","2 5 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16","12 1 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","8 3 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M512","1 16 32 5 1 16 1 1 0 ",
+"EU72_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M16","2 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192","4 6 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96","1 16 32 5 1 16 1 1 0 ",
+"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","12 1 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64","12 2 8 2 1 1 8 1 0 ",
+"EU72_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384","2 7 16 2 1 1 16 1 0 ",
+"EU72_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64","4 2 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24","12 1 8 2 1 1 8 1 0 ",
+"EU72_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128","4 2 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","4 6 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144","1 8 32 5 1 8 1 1 0 ",
+"EU72_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32","1 8 32 5 1 8 1 1 0 ",
+"EU72_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13","1 1 1 4 1 1 1 0 1 ",
+"EU72_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32","6 4 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32","1 8 32 5 1 8 1 1 0 ",
+"EU72_k3x3_cn64_g1_s1x1_d1x1_b0_in64x64_p1x1_num1_M64","2 7 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn256_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M1024","2 8 32 5 1 8 1 1 0 ",
+"EU72_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32","4 6 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","1 16 32 5 1 16 1 1 0 ",
+"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","4 6 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64","8 3 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64","12 2 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128","2 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","8 3 8 2 1 1 8 1 0 ",
+"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112","1 8 32 5 1 8 1 1 0 ",
+"EU72_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2","1 3 16 2 1 1 16 1 0 ",
+"EU72_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M2048","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
+"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64","1 8 32 5 1 8 1 1 0 ",
+"EU72_k1x1_cn512_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M2048","1 8 32 5 1 8 1 1 0 ",
+"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64","8 1 16 2 1 1 16 1 0 ",
+"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208","2 7 16 2 1 1 16 1 0 ",
+// Below is the information for OpenCL based on which these configurations tuned
+/*******************************************************************************
+Number of platforms                               1
+  Platform Name                                   Intel(R) OpenCL
+  Platform Vendor                                 Intel(R) Corporation
+  Platform Version                                OpenCL 2.0
+  Platform Profile                                FULL_PROFILE
+  Platform Extensions                             cl_intel_accelerator cl_intel_advanced_motion_estimation cl_intel_driver_diagnostics cl_intel_motion_estimation cl_intel_packed_yuv cl_intel_required_subgroup_size cl_intel_subgroups cl_intel_subgroups_short cl_intel_va_api_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_depth_images cl_khr_fp16 cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_khr_spir cl_khr_subgroups
+  Platform Extensions function suffix             INTEL
+
+  Platform Name                                   Intel(R) OpenCL
+Number of devices                                 1
+  Device Name                                     Intel(R) HD Graphics
+  Device Vendor                                   Intel(R) Corporation
+  Device Vendor ID                                0x8086
+  Device Version                                  OpenCL 2.0
+  Driver Version                                  16.5.56875
+  Device OpenCL C Version                         OpenCL C 2.0 ( using IGC )
+  Device Type                                     GPU
+  Device Profile                                  FULL_PROFILE
+  Max compute units                               48
+  Max clock frequency                             950MHz
+  Device Partition                                (core)
+    Max number of sub-devices                     0
+    Supported partition types                     by <unknown> (0x7F4B00000000)
+  Max work item dimensions                        3
+  Max work item sizes                             256x256x256
+  Max work group size                             256
+  Preferred work group size multiple              32
+  Preferred / native vector sizes
+    char                                                16 / 16
+    short                                                8 / 8
+    int                                                  4 / 4
+    long                                                 1 / 1
+    half                                                 8 / 8        (cl_khr_fp16)
+    float                                                1 / 1
+    double                                               1 / 1        (cl_khr_fp64)
+  Half-precision Floating-point support           (cl_khr_fp16)
+    Denormals                                     Yes
+    Infinity and NANs                             Yes
+    Round to nearest                              Yes
+    Round to zero                                 Yes
+    Round to infinity                             Yes
+    IEEE754-2008 fused multiply-add               Yes
+    Support is emulated in software               No
+    Correctly-rounded divide and sqrt operations  No
+  Single-precision Floating-point support         (core)
+    Denormals                                     Yes
+    Infinity and NANs                             Yes
+    Round to nearest                              Yes
+    Round to zero                                 Yes
+    Round to infinity                             Yes
+    IEEE754-2008 fused multiply-add               Yes
+    Support is emulated in software               No
+    Correctly-rounded divide and sqrt operations  Yes
+  Double-precision Floating-point support         (cl_khr_fp64)
+    Denormals                                     Yes
+    Infinity and NANs                             Yes
+    Round to nearest                              Yes
+    Round to zero                                 Yes
+    Round to infinity                             Yes
+    IEEE754-2008 fused multiply-add               Yes
+    Support is emulated in software               No
+    Correctly-rounded divide and sqrt operations  No
+  Address bits                                    64, Little-Endian
+  Global memory size                              13361912218 (12.44GiB)
+  Error Correction support                        No
+  Max memory allocation                           4294959103 (4GiB)
+  Unified memory for Host and Device              Yes
+  Shared Virtual Memory (SVM) capabilities        (core)
+    Coarse-grained buffer sharing                 Yes
+    Fine-grained buffer sharing                   No
+    Fine-grained system sharing                   No
+    Atomics                                       No
+  Minimum alignment for any data type             128 bytes
+  Alignment of base address                       1024 bits (128 bytes)
+  Preferred alignment for atomics
+    SVM                                           64 bytes
+    Global                                        64 bytes
+    Local                                         64 bytes
+  Max size for global variable                    65536 (64KiB)
+  Preferred total size of global vars             4294959103 (4GiB)
+  Global Memory cache type                        Read/Write
+  Global Memory cache size                        1048576
+  Global Memory cache line                        64 bytes
+  Image support                                   Yes
+    Max number of samplers per kernel             16
+    Max size for 1D images from buffer            268434943 pixels
+    Max 1D or 2D image array size                 2048 images
+    Base address alignment for 2D image buffers   4 bytes
+    Pitch alignment for 2D image buffers          4 bytes
+    Max 2D image size                             16384x16384 pixels
+    Max 3D image size                             16384x16384x2048 pixels
+    Max number of read image args                 128
+    Max number of write image args                128
+    Max number of read/write image args           128
+  Max number of pipe args                         16
+  Max active pipe reservations                    1
+  Max pipe packet size                            1024
+  Local memory type                               Local
+  Local memory size                               65536 (64KiB)
+  Max constant buffer size                        4294959103 (4GiB)
+  Max number of constant args                     8
+  Max size of kernel argument                     1024
+  Queue properties (on host)
+    Out-of-order execution                        Yes
+    Profiling                                     Yes
+  Queue properties (on device)
+    Out-of-order execution                        Yes
+    Profiling                                     Yes
+    Preferred size                                131072 (128KiB)
+    Max size                                      67108864 (64MiB)
+  Max queues on device                            1
+  Max events on device                            1024
+  Prefer user sync for interop                    Yes
+  Profiling timer resolution                      83ns
+  Execution capabilities
+    Run OpenCL kernels                            Yes
+    Run native kernels                            No
+    SPIR versions                                 1.2
+  printf() buffer size                            4194304 (4MiB)
+  Built-in kernels                                block_motion_estimate_intel;block_advanced_motion_estimate_check_intel;block_advanced_motion_estimate_bidirectional_check_intel
+  Motion Estimation accelerator version        (Intel)   2
+  Device Available                                Yes
+  Compiler Available                              Yes
+  Linker Available                                Yes
+  Device Extensions                               cl_intel_accelerator cl_intel_advanced_motion_estimation cl_intel_driver_diagnostics cl_intel_motion_estimation cl_intel_packed_yuv cl_intel_required_subgroup_size cl_intel_subgroups cl_intel_subgroups_short cl_intel_va_api_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_depth_images cl_khr_fp16 cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_khr_spir cl_khr_subgroups
+
+NULL platform behavior
+  clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ...)  No platform
+  clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, ...)   No platform
+  clCreateContext(NULL, ...) [default]            No platform
+  clCreateContext(NULL, ...) [other]              Success [INTEL]
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_CPU)  No platform
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_GPU)  No platform
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_ACCELERATOR)  No platform
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_CUSTOM)  No platform
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_ALL)  No platform
+********************************************************************************/
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","8 3 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64","8 2 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn32_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M128","1 16 32 5 1 16 1 1 0 ",
+"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32","8 1 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96","1 16 32 5 1 16 1 1 0 ",
+"EU48_k3x3_cn128_g1_s1x1_d1x1_b0_in32x32_p1x1_num1_M128","6 4 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128","2 8 32 5 1 8 1 1 0 ",
+"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64","8 1 16 2 1 1 16 1 0 ",
+"EU48_k2x2_cn16_g1_s2x2_d1x1_b0_in256x256_p0x0_num1_M16","2 7 16 2 1 1 16 1 0 ",
+"EU48_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4","6 4 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn128_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M512","2 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112","8 3 8 2 1 1 8 1 0 ",
+"EU48_k3x3_cn512_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M512","2 7 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64","2 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384","4 6 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16","8 2 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn256_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M1024","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192","4 7 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","1 8 32 5 1 8 1 1 0 ",
+"EU48_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320","2 7 16 2 1 1 16 1 0 ",
+"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64","1 8 32 5 1 8 1 1 0 ",
+"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48","4 2 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256","2 8 32 5 1 8 1 1 0 ",
+"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192","2 8 16 2 1 1 16 1 0 ",
+"EU48_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","12 1 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M256","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","12 2 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64","8 2 8 2 1 1 8 1 0 ",
+"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192","2 7 16 2 1 1 16 1 0 ",
+"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256","2 5 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn16_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M4","8 3 8 2 1 1 8 1 0 ",
+"EU48_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32","4 7 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256","4 7 8 2 1 1 8 1 0 ",
+"EU48_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13","1 1 1 4 1 1 1 0 1 ",
+"EU48_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64","4 1 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96","8 3 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M16","1 8 32 5 1 8 1 1 0 ",
+"EU48_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32","3 3 16 2 1 1 16 1 0 ",
+"EU48_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96","2 8 32 5 1 8 1 1 0 ",
+"EU48_k2x2_cn64_g1_s2x2_d1x1_b0_in128x128_p0x0_num1_M32","4 4 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","4 3 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M128","2 8 32 5 1 8 1 1 0 ",
+"EU48_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16","2 7 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn4_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M16","2 8 32 5 1 8 1 1 0 ",
+"EU48_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128","6 2 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4","4 2 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M128","1 16 32 5 1 16 1 1 0 ",
+"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M2048","1 16 32 5 1 16 1 1 0 ",
+"EU48_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn16_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M64","1 16 32 5 1 16 1 1 0 ",
+"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","4 7 8 2 1 1 8 1 0 ",
+"EU48_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192","2 5 16 2 1 1 16 1 0 ",
+"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128","6 4 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","8 3 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64","12 2 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64","2 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn2048_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M512","4 7 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64","12 2 8 2 1 1 8 1 0 ",
+"EU48_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224","1 8 32 5 1 8 1 1 0 ",
+"EU48_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384","2 7 16 2 1 1 16 1 0 ",
+"EU48_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256","1 8 32 5 1 8 1 1 0 ",
+"EU48_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384","2 4 16 2 1 1 16 1 0 ",
+"EU48_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288","2 4 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48","4 6 8 2 1 1 8 1 0 ",
+"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64","8 1 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","12 2 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn256_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","2 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192","4 6 8 2 1 1 8 1 0 ",
+"EU48_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128","4 5 16 2 1 1 16 1 0 ",
+"EU48_k3x3_cn256_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M256","2 6 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","8 3 8 2 1 1 8 1 0 ",
+"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48","4 2 16 2 1 1 16 1 0 ",
+"EU48_k3x3_cn64_g1_s1x1_d1x1_b0_in64x64_p1x1_num1_M64","10 2 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","4 6 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","4 5 8 2 1 1 8 1 0 ",
+"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208","2 5 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256","4 6 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M2048","2 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48","4 6 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M256","1 16 32 5 1 16 1 1 0 ",
+"EU48_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224","2 7 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32","1 8 32 5 1 8 1 1 0 ",
+"EU48_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32","2 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","1 8 32 5 1 8 1 1 0 ",
+"EU48_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288","2 7 16 2 1 1 16 1 0 ",
+"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192","2 7 16 2 1 1 16 1 0 ",
+"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32","4 3 16 2 1 1 16 1 0 ",
+"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96","4 2 16 2 1 1 16 1 0 ",
+"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208","2 5 16 2 1 1 16 1 0 ",
+"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96","4 2 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24","12 1 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M16","4 7 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M512","2 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn1024_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M256","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32","1 8 32 5 1 8 1 1 0 ",
+"EU48_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320","2 8 16 2 1 1 16 1 0 ",
+"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192","6 4 16 2 1 1 16 1 0 ",
+"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128","4 3 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","4 6 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","1 8 32 5 1 8 1 1 0 ",
+"EU48_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5","2 3 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32","8 3 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64","1 16 32 5 1 16 1 1 0 ",
+"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128","8 2 16 2 1 1 16 1 0 ",
+"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64","1 8 32 5 1 8 1 1 0 ",
+"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","4 6 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M32","1 16 32 5 1 16 1 1 0 ",
+"EU48_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M512","1 8 32 5 1 8 1 1 0 ",
+"EU48_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64","4 4 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32","8 3 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","12 2 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64","8 3 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M1024","1 8 32 5 1 8 1 1 0 ",
+"EU48_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4","1 1 1 4 1 1 1 0 1 ",
+"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256","2 7 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24","8 2 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16","12 1 8 2 1 1 8 1 0 ",
+"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128","10 2 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64","1 16 32 5 1 16 1 1 0 ",
+"EU48_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32","1 16 32 5 1 16 1 1 0 ",
+"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","4 7 8 2 1 1 8 1 0 ",
+"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16","12 2 8 2 1 1 8 1 0 ",
+"EU48_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2","1 4 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M4","8 2 8 2 1 1 8 1 0 ",
+"EU48_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64","4 2 16 2 1 1 16 1 0 ",
+"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16","1 8 32 5 1 8 1 1 0 ",
+"EU48_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128","4 2 16 2 1 1 16 1 0 ",
+"EU48_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32","2 8 16 2 1 1 16 1 0 ",
+"EU48_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128","4 2 16 2 1 1 16 1 0 ",
+// Below is the information for OpenCL based on which these configurations tuned
+/*******************************************************************************
+Number of platforms                               1
+  Platform Name                                   Intel(R) OpenCL
+  Platform Vendor                                 Intel(R) Corporation
+  Platform Version                                OpenCL 2.0
+  Platform Profile                                FULL_PROFILE
+  Platform Extensions                             cl_intel_accelerator cl_intel_advanced_motion_estimation cl_intel_device_side_avc_motion_estimation cl_intel_driver_diagnostics cl_intel_media_block_io cl_intel_motion_estimation cl_intel_planar_yuv cl_intel_packed_yuv cl_intel_required_subgroup_size cl_intel_subgroups cl_intel_subgroups_short cl_intel_va_api_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_depth_images cl_khr_fp16 cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_khr_spir cl_khr_subgroups
+  Platform Extensions function suffix             INTEL
+
+  Platform Name                                   Intel(R) OpenCL
+Number of devices                                 1
+  Device Name                                     Intel(R) HD Graphics
+  Device Vendor                                   Intel(R) Corporation
+  Device Vendor ID                                0x8086
+  Device Version                                  OpenCL 2.0
+  Driver Version                                  16.5.59288
+  Device OpenCL C Version                         OpenCL C 2.0
+  Device Type                                     GPU
+  Device Profile                                  FULL_PROFILE
+  Max compute units                               24
+  Max clock frequency                             1050MHz
+  Device Partition                                (core)
+    Max number of sub-devices                     0
+    Supported partition types                     by <unknown> (0x7F5100000000)
+  Max work item dimensions                        3
+  Max work item sizes                             256x256x256
+  Max work group size                             256
+  Preferred work group size multiple              32
+  Preferred / native vector sizes
+    char                                                16 / 16
+    short                                                8 / 8
+    int                                                  4 / 4
+    long                                                 1 / 1
+    half                                                 8 / 8        (cl_khr_fp16)
+    float                                                1 / 1
+    double                                               1 / 1        (cl_khr_fp64)
+  Half-precision Floating-point support           (cl_khr_fp16)
+    Denormals                                     Yes
+    Infinity and NANs                             Yes
+    Round to nearest                              Yes
+    Round to zero                                 Yes
+    Round to infinity                             Yes
+    IEEE754-2008 fused multiply-add               Yes
+    Support is emulated in software               No
+    Correctly-rounded divide and sqrt operations  No
+  Single-precision Floating-point support         (core)
+    Denormals                                     Yes
+    Infinity and NANs                             Yes
+    Round to nearest                              Yes
+    Round to zero                                 Yes
+    Round to infinity                             Yes
+    IEEE754-2008 fused multiply-add               Yes
+    Support is emulated in software               No
+    Correctly-rounded divide and sqrt operations  Yes
+  Double-precision Floating-point support         (cl_khr_fp64)
+    Denormals                                     Yes
+    Infinity and NANs                             Yes
+    Round to nearest                              Yes
+    Round to zero                                 Yes
+    Round to infinity                             Yes
+    IEEE754-2008 fused multiply-add               Yes
+    Support is emulated in software               No
+    Correctly-rounded divide and sqrt operations  No
+  Address bits                                    64, Little-Endian
+  Global memory size                              6588802663 (6.136GiB)
+  Error Correction support                        No
+  Max memory allocation                           3294401331 (3.068GiB)
+  Unified memory for Host and Device              Yes
+  Shared Virtual Memory (SVM) capabilities        (core)
+    Coarse-grained buffer sharing                 Yes
+    Fine-grained buffer sharing                   No
+    Fine-grained system sharing                   No
+    Atomics                                       No
+  Minimum alignment for any data type             128 bytes
+  Alignment of base address                       1024 bits (128 bytes)
+  Preferred alignment for atomics
+    SVM                                           64 bytes
+    Global                                        64 bytes
+    Local                                         64 bytes
+  Max size for global variable                    65536 (64KiB)
+  Preferred total size of global vars             3294401331 (3.068GiB)
+  Global Memory cache type                        Read/Write
+  Global Memory cache size                        524288
+  Global Memory cache line                        64 bytes
+  Image support                                   Yes
+    Max number of samplers per kernel             16
+    Max size for 1D images from buffer            205900083 pixels
+    Max 1D or 2D image array size                 2048 images
+    Base address alignment for 2D image buffers   4 bytes
+    Pitch alignment for 2D image buffers          4 bytes
+    Max 2D image size                             16384x16384 pixels
+    Max 3D image size                             16384x16384x2048 pixels
+    Max number of read image args                 128
+    Max number of write image args                128
+    Max number of read/write image args           128
+  Max number of pipe args                         16
+  Max active pipe reservations                    1
+  Max pipe packet size                            1024
+  Local memory type                               Local
+  Local memory size                               65536 (64KiB)
+  Max constant buffer size                        3294401331 (3.068GiB)
+  Max number of constant args                     8
+  Max size of kernel argument                     1024
+  Queue properties (on host)
+    Out-of-order execution                        Yes
+    Profiling                                     Yes
+  Queue properties (on device)
+    Out-of-order execution                        Yes
+    Profiling                                     Yes
+    Preferred size                                131072 (128KiB)
+    Max size                                      67108864 (64MiB)
+  Max queues on device                            1
+  Max events on device                            1024
+  Prefer user sync for interop                    Yes
+  Profiling timer resolution                      83ns
+  Execution capabilities
+    Run OpenCL kernels                            Yes
+    Run native kernels                            No
+    SPIR versions                                 1.2
+  printf() buffer size                            4194304 (4MiB)
+  Built-in kernels                                block_motion_estimate_intel;block_advanced_motion_estimate_check_intel;block_advanced_motion_estimate_bidirectional_check_intel
+  Motion Estimation accelerator version        (Intel)   2
+  Device Available                                Yes
+  Compiler Available                              Yes
+  Linker Available                                Yes
+  Device Extensions                               cl_intel_accelerator cl_intel_advanced_motion_estimation cl_intel_device_side_avc_motion_estimation cl_intel_driver_diagnostics cl_intel_media_block_io cl_intel_motion_estimation cl_intel_planar_yuv cl_intel_packed_yuv cl_intel_required_subgroup_size cl_intel_subgroups cl_intel_subgroups_short cl_intel_va_api_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_depth_images cl_khr_fp16 cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_mipmap_image cl_khr_mipmap_image_writes cl_khr_spir cl_khr_subgroups
+
+NULL platform behavior
+  clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ...)  No platform
+  clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, ...)   No platform
+  clCreateContext(NULL, ...) [default]            No platform
+  clCreateContext(NULL, ...) [other]              Success [INTEL]
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_CPU)  No platform
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_GPU)  No platform
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_ACCELERATOR)  No platform
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_CUSTOM)  No platform
+  clCreateContextFromType(NULL, CL_DEVICE_TYPE_ALL)  No platform
+********************************************************************************/
+"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64","2 8 32 5 1 8 1 1 0 ",
+"EU24_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32","4 6 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
+"EU24_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128","4 2 16 2 1 1 16 1 0 ",
+"EU24_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224","2 5 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32","1 8 32 5 1 8 1 1 0 ",
+"EU24_k2x2_cn16_g1_s2x2_d1x1_b0_in256x256_p0x0_num1_M16","1 8 32 5 1 8 1 1 0 ",
+"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128","4 3 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn256_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256","1 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384","1 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384","2 7 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn2048_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M512","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16","2 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128","2 7 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192","1 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224","2 7 16 2 1 1 16 1 0 ",
+"EU24_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32","1 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208","2 7 16 2 1 1 16 1 0 ",
+"EU24_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","1 8 32 5 1 8 1 1 0 ",
+"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64","1 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32","3 3 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24","8 3 8 2 1 1 8 1 0 ",
+"EU24_k3x3_cn128_g1_s1x1_d1x1_b0_in32x32_p1x1_num1_M128","6 4 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn1024_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M256","2 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208","2 7 16 2 1 1 16 1 0 ",
+"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128","4 3 16 2 1 1 16 1 0 ",
+"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48","4 2 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M2048","4 7 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M16","2 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192","6 4 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn256_g1_s1x1_d1x1_b0_in16x16_p0x0_num1_M1024","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn32_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M128","1 16 32 5 1 16 1 1 0 ",
+"EU24_k1x1_cn4_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M16","1 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384","2 7 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn128_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M32","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","4 6 8 2 1 1 8 1 0 ",
+"EU24_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128","4 4 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","2 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","8 2 8 2 1 1 8 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M2048","1 16 32 5 1 16 1 1 0 ",
+"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64","4 3 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384","1 8 32 5 1 8 1 1 0 ",
+"EU24_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4","1 1 1 4 1 1 1 0 1 ",
+"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192","6 4 16 2 1 1 16 1 0 ",
+"EU24_k3x3_cn256_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M256","2 7 16 2 1 1 16 1 0 ",
+"EU24_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320","2 8 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M256","2 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256","2 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256","2 5 16 2 1 1 16 1 0 ",
+"EU24_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64","4 3 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16","8 3 8 2 1 1 8 1 0 ",
+"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","2 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112","2 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M16","2 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M256","2 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128","1 8 32 5 1 8 1 1 0 ",
+"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32","4 2 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96","8 3 8 2 1 1 8 1 0 ",
+"EU24_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16","6 3 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112","1 8 32 5 1 8 1 1 0 ",
+"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96","4 3 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","8 2 8 2 1 1 8 1 0 ",
+"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","1 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288","2 8 16 2 1 1 16 1 0 ",
+"EU24_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288","2 7 16 2 1 1 16 1 0 ",
+"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4","10 2 8 2 1 1 8 1 0 ",
+"EU24_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16","8 2 8 2 1 1 8 1 0 ",
+"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64","1 16 32 5 1 16 1 1 0 ",
+"EU24_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32","4 7 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","8 3 8 2 1 1 8 1 0 ",
+"EU24_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192","2 7 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32","4 6 8 2 1 1 8 1 0 ",
+"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","4 6 8 2 1 1 8 1 0 ",
+"EU24_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32","1 8 32 5 1 8 1 1 0 ",
+"EU24_k2x2_cn64_g1_s2x2_d1x1_b0_in128x128_p0x0_num1_M32","2 8 32 5 1 8 1 1 0 ",
+"EU24_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128","4 3 16 2 1 1 16 1 0 ",
+"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48","8 1 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn16_g1_s1x1_d1x1_b0_in256x256_p0x0_num1_M4","8 3 8 2 1 1 8 1 0 ",
+"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144","2 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128","6 4 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32","2 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192","2 7 16 2 1 1 16 1 0 ",
+"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64","4 2 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160","1 8 32 5 1 8 1 1 0 ",
+"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96","4 4 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","4 6 8 2 1 1 8 1 0 ",
+"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32","2 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32","2 8 16 2 1 1 16 1 0 ",
+"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128","10 2 16 2 1 1 16 1 0 ",
+"EU24_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320","2 7 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32","8 3 8 2 1 1 8 1 0 ",
+"EU24_k3x3_cn64_g1_s1x1_d1x1_b0_in64x64_p1x1_num1_M64","2 8 16 2 1 1 16 1 0 ",
+"EU24_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5","2 3 8 2 1 1 8 1 0 ",
+"EU24_k1x1_cn16_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M64","1 16 32 5 1 16 1 1 0 ",
+"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48","4 6 8 2 1 1 8 1 0 ",
+"EU24_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64","4 2 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn64_g1_s1x1_d1x1_b0_in128x128_p0x0_num1_M4","8 2 8 2 1 1 8 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64","8 2 8 2 1 1 8 1 0 ",
+"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn64_g1_s1x1_d1x1_b0_in64x64_p0x0_num1_M64","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48","4 6 8 2 1 1 8 1 0 ",
+"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256","2 7 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4","4 4 16 2 1 1 16 1 0 ",
+"EU24_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2","1 3 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96","1 8 32 5 1 8 1 1 0 ",
+"EU24_k3x3_cn512_g1_s1x1_d1x1_b0_in16x16_p1x1_num1_M512","2 7 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn512_g1_s2x2_d1x1_b0_in32x32_p0x0_num1_M1024","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
+"EU24_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64","4 1 16 2 1 1 16 1 0 ",
+"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192","6 4 16 2 1 1 16 1 0 ",
+"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64","1 16 32 5 1 16 1 1 0 ",
+"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16","8 3 8 2 1 1 8 1 0 ",
+"EU24_k1x1_cn128_g1_s1x1_d1x1_b0_in32x32_p0x0_num1_M512","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn1024_g1_s2x2_d1x1_b0_in16x16_p0x0_num1_M512","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384","4 7 8 2 1 1 8 1 0 ",
+"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160","1 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64","8 3 8 2 1 1 8 1 0 ",
+"EU24_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13","1 1 1 4 1 1 1 0 1 ",
+"EU24_k1x1_cn256_g1_s2x2_d1x1_b0_in64x64_p0x0_num1_M512","2 8 32 5 1 8 1 1 0 ",
+"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24","8 3 8 2 1 1 8 1 0 ",
+"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32","4 3 16 2 1 1 16 1 0 ",
+};
+#endif
diff --git a/modules/dnn/src/ocl4dnn/include/math_functions.hpp b/modules/dnn/src/ocl4dnn/include/math_functions.hpp
new file mode 100644 (file)
index 0000000..cac8604
--- /dev/null
@@ -0,0 +1,90 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef _OPENCV_GREENTEA_MATH_FUNCTIONS_HPP_
+#define _OPENCV_GREENTEA_MATH_FUNCTIONS_HPP_
+#include "../../precomp.hpp"
+#include "common.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+namespace ocl4dnn
+{
+
+#ifdef HAVE_OPENCL
+enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113};
+
+template<typename Dtype>
+bool ocl4dnnGEMMCommon(const CBLAS_TRANSPOSE TransB,
+                       const int32_t M, const int32_t N, const int32_t K,
+                       const UMat A, const UMat B,
+                       const UMat B_image, UMat C,
+                       const size_t max_image_size);
+
+template<typename Dtype>
+ocl::Image2D ocl4dnnGEMMCopyBufferToImage(UMat buffer, int offset,
+                                          bool is_matrix_a, bool transpose,
+                                          bool padding, int padded_height,
+                                          int padded_width, int height,
+                                          int width,  int ld);
+
+template<typename Dtype>
+bool ocl4dnnGEMV(const CBLAS_TRANSPOSE TransA,
+                 const int32_t M, const int32_t N, const Dtype alpha,
+                 const UMat A, const int32_t offA, const UMat x,
+                 const int32_t offx, const Dtype beta, UMat y,
+                 const int32_t offy);
+
+template<typename Dtype>
+bool ocl4dnnAXPY(const int32_t N, const Dtype alpha,
+                 const UMat x, const int32_t offx, UMat y,
+                 const int32_t offy);
+
+#endif  // HAVE_OPENCL
+
+} // namespace ocl4dnn
+} // namespace dnn
+} // namespce cv
+
+#endif
diff --git a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
new file mode 100644 (file)
index 0000000..09bda05
--- /dev/null
@@ -0,0 +1,476 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef _OPENCV_LIBDNN_HPP_
+#define _OPENCV_LIBDNN_HPP_
+#include "../../precomp.hpp"
+#include <iomanip>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include "common.hpp"
+
+namespace cv { namespace dnn { namespace ocl4dnn {
+#ifdef HAVE_OPENCL
+
+struct OCL4DNNConvConfig
+{
+    OCL4DNNConvConfig() :
+        kernel(1, 1),
+        pad(0, 0),
+        stride(1, 1),
+        dilation(1, 1),
+        group(1),
+        bias_term(false)
+    {}
+    MatShape in_shape;
+    MatShape out_shape;
+    Size kernel;
+    Size pad;
+    Size stride;
+    Size dilation;
+    int group; // = 1;
+    bool bias_term; // = false;
+};
+
+
+template<typename Dtype>
+class OCL4DNNConvSpatial
+{
+    public:
+        explicit OCL4DNNConvSpatial(OCL4DNNConvConfig config);
+        ~OCL4DNNConvSpatial();
+        bool Forward(const UMat& bottom_data, const UMat& weight,
+                     const UMat& bias,
+                     UMat& top_data, int32_t batch_size);
+
+    private:
+        struct kernelConfig
+        {
+            std::string kernelName;
+            float executionTime;
+            size_t local_work_size[3];
+            size_t global_work_size[3];
+            int32_t workItem_output[3];
+            bool verified;
+            bool tested;
+            bool swizzle_weights;
+            bool use_null_local;
+            int32_t kernelType;
+
+            kernelConfig()
+            {}
+
+            kernelConfig(const std::string& name, const size_t* global_size, const size_t* local_size,
+                         const int32_t* workItem,
+                         bool swizzle,
+                         int32_t type = 0)
+                : executionTime(0)
+            {
+                kernelName = name;
+                for (int32_t x = 0; x < 3; x++)
+                {
+                    local_work_size[x] = local_size ? local_size[x] : 1;
+                    global_work_size[x] = global_size[x];
+                    workItem_output[x] = workItem[x];
+                }
+                swizzle_weights = swizzle;
+                use_null_local = local_size == NULL;
+                verified = false;
+                tested = false;
+                kernelType = type;
+            }
+        };
+
+        struct tunerParam
+        {
+           int kernelType;
+           int blockWidth;
+           int blockHeight;
+           int blockDepth;
+
+           tunerParam(int type, int w, int h, int d)
+           {
+               kernelType = type;
+               blockWidth = w;
+               blockHeight= h;
+               blockDepth = d;
+           }
+        };
+
+        inline void addDef(const char* name)
+        {
+            options_ << " -D " << name;
+        }
+
+        inline void addDef(const char* name, const int value)
+        {
+            options_ << " -D " << name << "=" << value;
+        }
+
+        inline void addDef(const char* name, const float value)
+        {
+            options_ << " -D " << name << "=(float)" << value;
+        }
+
+        inline void addDef(const char* name, const double value)
+        {
+            options_ << " -D " << name << "=(double)" << value;
+        }
+
+        inline void addDef(const char* name, const char* value)
+        {
+            options_ << " -D " << name << "=" << value;
+        }
+
+        void useFirstAvailable(const UMat &bottom,
+                               UMat &top,
+                               const UMat &weight,
+                               const UMat &bias,
+                               int32_t numImages,
+                               UMat &verifyTop);
+        void setupKernel();
+        void collectCommonInformation();
+        void setupKernelDetails(int32_t kernelType,
+                                int32_t blockM,
+                                int32_t blockK,
+                                int32_t blockN);
+
+        ocl::Program compileKernel();
+        typedef std::map<std::string, ocl::Program> phash_t;
+        phash_t phash;
+        void calculateBenchmark(const UMat &bottom, UMat &verifyTop,
+                                const UMat &weight, const UMat &bias,
+                                int32_t numImages);
+
+
+        void setupConvolution(const UMat &bottom,
+                              UMat &top,
+                              const UMat &weight,
+                              const UMat &bias,
+                              int32_t numImags,
+                              UMat &verifyTop);
+        bool createConvolutionKernel(int32_t kernelType,
+                                     int32_t blockWidth,
+                                     int32_t blockHeight,
+                                     int32_t blockDepth);
+        bool setupIDLF(int32_t blockWidth,
+                       int32_t blockHeight,
+                       int32_t blockDepth);
+        bool createBasicKernel(int32_t blockWidth,
+                               int32_t blockHeight,
+                               int32_t blockDepth);
+        bool createGEMMLikeConvKernel(int32_t blockWidth,
+                                      int32_t blockHeight,
+                                      int32_t blockDepth);
+        void CreateSubBuffer(const UMat& buffer, UMat& sub_buffer,
+                             int32_t offset, int32_t size, bool write_only);
+        bool convolve(const UMat &bottom, UMat &top,
+                      const UMat &weight, const UMat &bias,
+                      int32_t numImages,
+                      kernelConfig* config,
+                      const cv::ocl::Queue& queue);
+        float timedConvolve(const UMat &bottom, UMat &top,
+                            const UMat &weight, const UMat &bias,
+                            int32_t numImages, kernelConfig* config);
+
+        bool verifyResult(const UMat &bottom,
+                          UMat &top,
+                          const UMat &weight,
+                          const UMat &bias,
+                          int32_t numImages,
+                          kernelConfig* config,
+                          UMat &verifyTop);
+
+        bool swizzleWeight(const UMat &weight,
+                           int32_t swizzled_factor,
+                           bool interleave = false);
+
+        void generateKey();
+        std::string generateSpecificKey(int32_t type, int32_t blockWidth,
+                                          int32_t blockHeight,
+                                          int32_t blockDepth);
+        void cacheTunedConfig();
+        bool loadTunedConfig();
+
+        void saveTunedConfig();
+        bool loadCachedConfig();
+
+        void unloadProgram(const std::string& kernelName);
+        void prepareKernel(const UMat &bottom, UMat &top,
+                           const UMat &weight, const UMat &bias,
+                           int32_t numImages);
+        bool setupKernelByConfig(int x, int y, int z, int type,
+                                 int lx, int ly, int lz,
+                                 bool swizzle, bool nullLocal);
+        void generateTunerItems(std::vector< cv::Ptr<tunerParam> > &tunerItems);
+
+        int32_t group_;
+        bool bias_term_;
+        UMat swizzled_weights_umat;
+
+        int32_t bottom_index_;
+        int32_t output_h_;
+        int32_t output_w_;
+        int32_t kernel_h_;
+        int32_t kernel_w_;
+        int32_t height_;
+        int32_t width_;
+        int32_t pad_h_;
+        int32_t pad_w_;
+        int32_t stride_h_;
+        int32_t stride_w_;
+        int32_t dilation_h_;
+        int32_t dilation_w_;
+
+        /// M_ is the channel dimension of the output for a single group, which is the
+        /// leading dimension of the filter matrix.
+        int32_t M_;
+
+        bool tuned_;
+        std::string key_, key_sanitized_;
+        std::string short_key_;
+        std::string kernel_name_;
+        std::string cache_path_;
+        bool use_cache_path_; // true if cache_path_ directory exists
+        bool force_auto_tuning_;
+        int32_t kernel_index_;
+        std::vector< cv::Ptr<kernelConfig> > kernelQueue;
+        cv::Ptr<kernelConfig> bestKernelConfig;
+
+        int32_t bottom_dim_;
+        int32_t top_dim_;
+        int32_t num_;
+        int32_t channels_;
+        int32_t num_output_;
+
+        int32_t kernelType_;
+        int32_t blockM_;
+        int32_t blockK_;
+        int32_t blockN_;
+        std::stringstream options_;
+        cv::ocl::ProgramSource src_;
+        int32_t prev_kernel_type_;
+};
+
+typedef enum {
+    LIBDNN_POOLING_METHOD_MAX                 = 0,
+    LIBDNN_POOLING_METHOD_AVE                 = 1,
+    LIBDNN_POOLING_METHOD_STO                 = 2
+} ocl4dnnPoolingMethod_t;
+
+struct OCL4DNNPoolConfig
+{
+    OCL4DNNPoolConfig() :
+        kernel(1, 1),
+        pad(0, 0),
+        stride(1, 1),
+        dilation(1, 1),
+        channels(0),
+        pool_method(LIBDNN_POOLING_METHOD_MAX),
+        global_pooling(false)
+    {}
+    MatShape in_shape;
+    MatShape out_shape;
+    Size kernel;
+    Size pad;
+    Size stride;
+    Size dilation;
+
+    int channels;
+    ocl4dnnPoolingMethod_t pool_method; // = LIBDNN_POOLING_METHOD_MAX;
+    bool global_pooling; // = false;
+};
+
+template<typename Dtype>
+class OCL4DNNPool
+{
+    public:
+        explicit OCL4DNNPool(OCL4DNNPoolConfig config);
+        ~OCL4DNNPool();
+        bool Forward(const UMat& bottom_data,
+                     UMat& top_data,
+                     UMat& top_mask);
+    private:
+        UMat mask_idx_;
+
+        // Pooling parameters
+        std::vector<int32_t> pad_;
+        std::vector<int32_t> stride_;
+        std::vector<int32_t> kernel_shape_;
+        std::vector<int32_t> im_in_shape_;
+        std::vector<int32_t> im_out_shape_;
+
+        ocl4dnnPoolingMethod_t pool_method_;
+        int32_t count_;
+        int32_t batch_size_;
+        int32_t channels_;
+        int32_t kernel_h_;
+        int32_t kernel_w_;
+        int32_t stride_h_;
+        int32_t stride_w_;
+        int32_t pad_h_;
+        int32_t pad_w_;
+        int32_t height_;
+        int32_t width_;
+        int32_t pooled_height_;
+        int32_t pooled_width_;
+};
+
+struct OCL4DNNInnerProductConfig
+{
+    OCL4DNNInnerProductConfig() :
+        num_output(0), M(0), K(0),
+        bias_term(false), transpose(false), phase_test(true)
+    {}
+    int num_output;
+    int M;
+    int K;
+    bool bias_term;
+    bool transpose; // = false;
+    bool phase_test; // = true;
+};
+
+template<typename Dtype>
+class OCL4DNNInnerProduct
+{
+    public:
+        explicit OCL4DNNInnerProduct(OCL4DNNInnerProductConfig config);
+        ~OCL4DNNInnerProduct();
+        bool Forward(const UMat& bottom_data,
+                     const UMat& weight,
+                     const UMat& bias,
+                     UMat& top_data);
+    private:
+        OCL4DNNInnerProductConfig config_;
+        int32_t axis_;
+        int32_t num_output_;
+        int32_t M_;
+        int32_t N_;
+        int32_t K_;
+        bool bias_term_;
+        bool transpose_;
+        bool image_copied_;
+        bool phase_test_;
+};
+
+typedef enum {
+    LRNParameter_NormRegion_ACROSS_CHANNELS = 0,
+    LRNParameter_NormRegion_WITHIN_CHANNEL = 1
+} LRNParameter_NormRegion_WITHIN_CHANNEL_t;
+
+struct OCL4DNNLRNConfig
+{
+    OCL4DNNLRNConfig() :
+        lrn_type(LRNParameter_NormRegion_ACROSS_CHANNELS),
+        phase_test(true),
+        local_size(0), alpha(0.f), beta(0.f), k(0.f), norm_by_size(false),
+        batch_size(0), channels(0), height(0), width(0)
+    {}
+    MatShape in_shape;
+    LRNParameter_NormRegion_WITHIN_CHANNEL_t lrn_type;
+    bool phase_test; // = true;
+    int local_size;
+    float alpha;
+    float beta;
+    float k;
+    bool norm_by_size;
+    int32_t batch_size;
+    int32_t channels;
+    int32_t height;
+    int32_t width;
+};
+
+template<typename Dtype>
+class OCL4DNNLRN
+{
+    public:
+        explicit OCL4DNNLRN(OCL4DNNLRNConfig config);
+        bool Forward(const UMat& bottom_data, UMat& top_data);
+
+    private:
+        bool crossChannelForward(const UMat& bottom_data, UMat& top_data);
+        LRNParameter_NormRegion_WITHIN_CHANNEL_t lrn_type_;
+        bool phase_test_;
+        int32_t size_;
+        Dtype alpha_;
+        Dtype beta_;
+        Dtype k_;
+        int32_t num_;
+        int32_t channels_;
+        int32_t height_;
+        int32_t width_;
+        bool norm_by_size_;
+};
+
+struct OCL4DNNSoftmaxConfig
+{
+    OCL4DNNSoftmaxConfig() : axis(0), channels(0)
+    {}
+    MatShape in_shape;
+    int axis;
+    int channels;
+};
+
+template<typename Dtype>
+class OCL4DNNSoftmax
+{
+    public:
+        explicit OCL4DNNSoftmax(OCL4DNNSoftmaxConfig config);
+        ~OCL4DNNSoftmax();
+        bool Forward(const UMat& bottom_data, UMat& top_data);
+
+    private:
+        int32_t softmax_axis_;
+        int32_t inner_num_;
+        int32_t outer_num_;
+        int32_t channels_;
+        int32_t count_;
+        bool use_slm_;
+        UMat scale_data_;
+};
+#endif // HAVE_OPENCL
+} // namespace ocl4dnn
+} // namespace dnn
+} // namespce cv
+#endif
diff --git a/modules/dnn/src/ocl4dnn/src/common.cpp b/modules/dnn/src/ocl4dnn/src/common.cpp
new file mode 100644 (file)
index 0000000..5a18c41
--- /dev/null
@@ -0,0 +1,57 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "../../precomp.hpp"
+#include "common.hpp"
+#include "opencl_kernels_dnn.hpp"
+
+using namespace cv;
+
+#ifdef HAVE_OPENCL
+bool clOptionSupport(cv::String option)
+{
+    cv::String errmsg;
+    ocl::Program program = ocl::Context::getDefault().getProg(ocl::dnn::dummy_oclsrc, option, errmsg);
+    return program.ptr() ? true : false;
+}
+
+#endif // HAVE_OPENCL
diff --git a/modules/dnn/src/ocl4dnn/src/math_functions.cpp b/modules/dnn/src/ocl4dnn/src/math_functions.cpp
new file mode 100644 (file)
index 0000000..42b3557
--- /dev/null
@@ -0,0 +1,538 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "../../precomp.hpp"
+#include "common.hpp"
+#include "math_functions.hpp"
+#include <vector>
+#include "opencl_kernels_dnn.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+namespace ocl4dnn
+{
+
+#ifdef HAVE_OPENCL
+// Create and copy buffer to image for GEMM's matrix A and B.
+// Will return image to caller if the input image is NULL. Otherwise,
+// will use the image directly. It's caller's responsibility to
+// release the created image.
+template<typename Dtype>
+ocl::Image2D ocl4dnnGEMMCopyBufferToImage(UMat buffer, int offset,
+                                          bool is_matrix_a, bool transpose,
+                                          bool padding, int padded_height,
+                                          int padded_width, int height,
+                                          int width, int ld)
+{
+    ocl::Context ctx = ocl::Context::getDefault();
+    ocl::Queue queue = ocl::Queue::getDefault();
+    ocl::Image2D image;
+
+    if (!is_matrix_a && transpose)
+    {
+        if (ld == width)
+        {
+            image = ocl::Image2D(buffer);
+        } else {
+            // For matrix B with transpose, we need to handle them differently.
+            // As we can't use the sub group block read to get a row easily,
+            // we have to use CL_FLOAT type with read_imagef to get the row.
+            UMat mat(height, width, CV_32FC1);
+            image = ocl::Image2D(mat);
+
+            ocl::Kernel oclk_gemm_copy("gemm_buffer_copy_image_transpose_float", ocl::dnn::gemm_image_oclsrc);
+
+            size_t global_copy[2];
+            global_copy[0] = width;
+            global_copy[1] = height;
+            oclk_gemm_copy.set(0, ocl::KernelArg::PtrReadOnly(buffer));
+            oclk_gemm_copy.set(1, image);
+            oclk_gemm_copy.set(2, offset);
+            oclk_gemm_copy.set(3, width);
+            oclk_gemm_copy.set(4, height);
+            oclk_gemm_copy.set(5, ld);
+            oclk_gemm_copy.run(2, global_copy, NULL, false);
+        }
+    } else {
+        if (!padding)
+        {
+            // copy without padding.
+            image = ocl::Image2D(buffer);
+        } else {
+            UMat mat(padded_height, padded_width, CV_8UC4);
+            image = ocl::Image2D(mat);
+
+            ocl::Kernel oclk_gemm_copy("gemm_buffer_copy_image_no_transpose_float",
+                                       ocl::dnn::gemm_image_oclsrc);
+
+            size_t global_copy[2];
+            global_copy[0] = padded_width;
+            global_copy[1] = padded_height;
+
+            oclk_gemm_copy.set(0, ocl::KernelArg::PtrReadOnly(buffer));
+            oclk_gemm_copy.set(1, image);
+            oclk_gemm_copy.set(2, offset);
+            oclk_gemm_copy.set(3, width);
+            oclk_gemm_copy.set(4, height);
+            oclk_gemm_copy.set(5, ld);
+
+            oclk_gemm_copy.run(2, global_copy, NULL, false);
+        }
+    }
+
+    return image;
+}
+
+template
+ocl::Image2D ocl4dnnGEMMCopyBufferToImage<float>(UMat buffer, int offset,
+                                                 bool is_matrix_a, bool transpose,
+                                                 bool padding, int padded_height,
+                                                 int padded_width, int height,
+                                                 int width,  int ld);
+
+enum gemm_type_t
+{
+    GEMM_TYPE_NONE = 0,
+    GEMM_TYPE_FAST_IMAGE_32_1,
+    GEMM_TYPE_FAST_IMAGE_32_2,
+    GEMM_TYPE_FAST_IMAGE_B_IMAGE,
+    GEMM_TYPE_MAX
+};
+
+template<typename Dtype>
+static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
+                                 const CBLAS_TRANSPOSE TransB, const int32_t M,
+                                 const int32_t N, const int32_t K, const Dtype alpha,
+                                 const UMat A, const int32_t offA, const UMat B,
+                                 const int32_t offB, const Dtype beta, UMat C,
+                                 const int32_t offC, bool is_image_a, bool is_image_b,
+                                 enum gemm_type_t gemm_type,
+                                 const size_t max_image_size)
+{
+    CHECK_EQ(gemm_type == GEMM_TYPE_FAST_IMAGE_32_1 || gemm_type == GEMM_TYPE_FAST_IMAGE_32_2 ||
+             gemm_type == GEMM_TYPE_FAST_IMAGE_B_IMAGE, true) << "Invalid fast image gemm type." << std::endl;
+
+    if (is_image_a)
+    {
+        CHECK_EQ(offA, 0) << "Invalid input image offset." << std::endl;
+        return false;
+    }
+
+    if (is_image_b)
+    {
+        CHECK_EQ(offB, 0) << "Invalid input image offset." << std::endl;
+        return false;
+    }
+
+    int widthA = (TransA == CblasNoTrans) ? K : M;
+    int heightA = (TransA == CblasNoTrans) ? M : K;
+    int widthB = (TransB == CblasNoTrans) ? N : K;
+    int heightB = (TransB == CblasNoTrans) ? K : N;
+
+    int ldA = widthA;
+    int ldB = widthB;
+    int ldC = N;
+
+    int A_start_x = 0, A_start_y = 0, B_start_x = 0;
+    int B_start_y = 0, C_start_x = 0, C_start_y = 0;
+    int blocksize = 1024;
+    if (gemm_type == GEMM_TYPE_FAST_IMAGE_B_IMAGE)
+        blocksize = max_image_size;
+    int blockA_width = blocksize;
+    int blockA_height = blocksize;
+    int blockB_width = blocksize;
+    int blockB_height = blocksize;
+    int blockC_width = blocksize;
+    int blockC_height = blocksize;
+
+    int use_buffer_indicator = 8;
+    // To fix the edge problem casued by the sub group block read.
+    // we have to pad the image if it's not multiple of tile.
+    // just padding one line is enough as the sub group block read
+    // will clamp to edge according to the spec.
+
+    ocl::Context ctx = ocl::Context::getDefault();
+    ocl::Queue queue = ocl::Queue::getDefault();
+
+    ocl::Image2D ImA;
+    ocl::Image2D ImB;
+
+    std::string kernel_name("gemm_");
+    if (gemm_type == GEMM_TYPE_FAST_IMAGE_32_1 || gemm_type == GEMM_TYPE_FAST_IMAGE_B_IMAGE)
+        kernel_name += "32_1_";
+    else
+        kernel_name += "32_2_";
+
+    if (TransA == CblasNoTrans)
+        kernel_name += "N";
+    else
+        kernel_name += "T";
+
+    if (TransB == CblasNoTrans)
+    {
+        kernel_name += "N_";
+    } else {
+        kernel_name += "T_";
+        if (is_image_b || (K % use_buffer_indicator != 0))
+        {
+            kernel_name += "SCALAR_";
+        } else {
+            kernel_name += "BUFFER_";
+        }
+    }
+
+    if (alpha == 1)
+        kernel_name += "1_";
+    else
+        kernel_name += "0_";
+
+    if (beta == 0)
+        kernel_name += "0";
+    else
+        kernel_name += "1";
+
+    kernel_name += "_float";
+
+    ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_image_oclsrc);
+    if (oclk_gemm_float.empty())
+        return false;
+
+    while (C_start_y < M)
+    {
+        blockC_width = std::min(static_cast<int>(N) - C_start_x, blocksize);
+        blockC_height = std::min(static_cast<int>(M) - C_start_y, blocksize);
+
+        int isFirstColBlock = 1;
+        for (int k = 0; k < K; k += blocksize)
+        {
+            blockA_width = std::min(widthA - A_start_x, blocksize);
+            blockA_height = std::min(heightA - A_start_y, blocksize);
+            blockB_width = std::min(widthB - B_start_x, blocksize);
+            blockB_height = std::min(heightB - B_start_y, blocksize);
+            int block_Ksize = std::min(static_cast<int>(K) - k, blocksize);
+
+            int padded_k = block_Ksize + ((block_Ksize & 7) ? (8 - (block_Ksize & 7)) : 0);
+            int imageA_w = (TransA == CblasNoTrans) ? padded_k : blockA_width;
+            int imageA_h = (TransA == CblasNoTrans) ? blockA_height : padded_k;
+            int imageB_w = (TransB == CblasNoTrans) ? blockB_width : padded_k;
+            int imageB_h = (TransB == CblasNoTrans) ? padded_k : blockB_height;
+
+            int blockA_offset = offA + A_start_y * ldA + A_start_x;
+            int blockB_offset = offB + B_start_y * ldB + B_start_x;
+            int blockC_offset = offC + C_start_y * ldC + C_start_x;
+            if (TransB == CblasNoTrans)
+            {
+                bool padding_A = false;
+                bool padding_B = false;
+
+                if (!is_image_a && !is_image_b)
+                {
+                    if (M * K < N * K)
+                        padding_B = true;
+                    else
+                        padding_A = true;
+                }
+
+                if (!is_image_a)
+                {
+                    ImA = ocl4dnnGEMMCopyBufferToImage<Dtype>(A, blockA_offset,
+                                                              true, TransA != CblasNoTrans,
+                                                              padding_A, imageA_h, imageA_w,
+                                                              blockA_height, blockA_width, ldA);
+                }
+                if (!is_image_b)
+                {
+                    ImB = ocl4dnnGEMMCopyBufferToImage<Dtype>(B, blockB_offset,
+                                                              false, false,
+                                                              padding_B, imageB_h, imageB_w,
+                                                              blockB_height, blockB_width, ldB);
+                }
+            } else {
+                // We will use normal read_imagef to read image B when B has transpose.
+                // thus we don't need to pad image A at all.
+                if (!is_image_a)
+                {
+                    bool padding;
+                    padding = !is_image_b;
+                    ImA = ocl4dnnGEMMCopyBufferToImage<Dtype>(A, blockA_offset,
+                                                              true, TransA != CblasNoTrans,
+                                                              padding, imageA_h, imageA_w,
+                                                              blockA_height, blockA_width, ldA);
+                }
+
+                if (!is_image_b && (K % use_buffer_indicator != 0))
+                {
+                    ImB = ocl4dnnGEMMCopyBufferToImage<Dtype>(B, blockB_offset,
+                                                              false, true, false, imageB_h, imageB_w,
+                                                              blockB_height, blockB_width, ldB);
+                }
+            }
+
+            size_t global[2];
+            if (gemm_type == GEMM_TYPE_FAST_IMAGE_32_1 || gemm_type == GEMM_TYPE_FAST_IMAGE_B_IMAGE)
+            {
+                global[0] = (size_t)( blockC_width + 7 ) & ~7;
+            } else {
+                global[0] = (size_t)( (blockC_width / 2 ) + 7 ) ^ ~7;
+            }
+            global[1] = (size_t)(blockC_height + 31) / 32;
+
+            size_t local[2];
+            local[0] = 8;
+            local[1] = 1;
+
+            cl_uint arg_idx = 0;
+            if (is_image_a)
+                oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrReadOnly(A));
+            else
+                oclk_gemm_float.set(arg_idx++, ImA);
+
+            if (TransB == CblasNoTrans || is_image_b || (K % use_buffer_indicator != 0))
+            {
+                if (is_image_b)
+                    oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrReadOnly(B));
+                else
+                    oclk_gemm_float.set(arg_idx++, ImB);
+            } else {
+                oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrReadOnly(B));
+                oclk_gemm_float.set(arg_idx++, blockB_offset);
+                oclk_gemm_float.set(arg_idx++, ldB);
+            }
+            oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrWriteOnly(C));
+            oclk_gemm_float.set(arg_idx++, blockC_offset);
+            oclk_gemm_float.set(arg_idx++, blockC_height);
+            oclk_gemm_float.set(arg_idx++, blockC_width);
+            oclk_gemm_float.set(arg_idx++, ldC);
+            oclk_gemm_float.set(arg_idx++, alpha);
+            oclk_gemm_float.set(arg_idx++, beta);
+            oclk_gemm_float.set(arg_idx++, padded_k);
+            if (TransB != CblasNoTrans)
+                oclk_gemm_float.set(arg_idx++, block_Ksize);
+            oclk_gemm_float.set(arg_idx++, isFirstColBlock);
+
+            if (!oclk_gemm_float.run(2, global, local, false))
+                return false;
+
+            if (TransA == CblasNoTrans)
+                A_start_x += blockA_width;
+            else
+                A_start_y += blockA_height;
+
+            if (TransB == CblasNoTrans)
+                B_start_y += blockB_height;
+            else
+                B_start_x += blockB_width;
+
+            isFirstColBlock = 0;
+        }
+
+        C_start_x += blockC_width;
+        if (TransA == CblasNoTrans)
+            A_start_x = 0;
+        else
+            A_start_y = 0;
+        if (TransB == CblasNoTrans)
+        {
+            B_start_x += blockB_width;
+            B_start_y = 0;
+        } else {
+            B_start_y += blockB_height;
+            B_start_x = 0;
+        }
+        if (C_start_x >= N)
+        {
+            C_start_x = 0;
+            B_start_x = 0;
+            B_start_y = 0;
+            C_start_y += blockC_height;
+            if (TransA == CblasNoTrans)
+                A_start_y += blockA_height;
+            else
+                A_start_x += blockA_width;
+        }
+    }
+
+    return true;
+}
+
+template<typename Dtype>
+bool ocl4dnnGEMMCommon(const CBLAS_TRANSPOSE TransB,
+                       const int32_t M, const int32_t N, const int32_t K,
+                       const UMat A, const UMat B,
+                       const UMat B_image, UMat C,
+                       const size_t max_image_size)
+{
+    gemm_type_t gemm_type = GEMM_TYPE_FAST_IMAGE_32_1;
+
+    if (gemm_type == GEMM_TYPE_FAST_IMAGE_32_1 ||
+        gemm_type == GEMM_TYPE_FAST_IMAGE_32_2)
+    {
+        return ocl4dnnFastImageGEMM<Dtype>(CblasNoTrans, TransB, M, N, K,
+                                           (Dtype)1., A, 0, B, 0, (Dtype)0., C,
+                                           0, false, false, gemm_type, max_image_size);
+    }
+    else if (gemm_type == GEMM_TYPE_FAST_IMAGE_B_IMAGE)
+    {
+        return ocl4dnnFastImageGEMM<Dtype>(CblasNoTrans, TransB, M, N, K,
+                                           (Dtype)1., A, 0, B_image, 0, (Dtype)0., C,
+                                           0, false, true,
+                                           GEMM_TYPE_FAST_IMAGE_B_IMAGE,
+                                           max_image_size);
+    }
+    return false;
+}
+
+template bool ocl4dnnGEMMCommon<float>(const CBLAS_TRANSPOSE TransB,
+                                       const int32_t M, const int32_t N, const int32_t K,
+                                       const UMat A, const UMat B,
+                                       const UMat B_image, UMat C,
+                                       const size_t max_image_size);
+
+template<typename Dtype>
+bool ocl4dnnGEMV(const CBLAS_TRANSPOSE TransA,
+                 const int32_t M, const int32_t N, const Dtype alpha,
+                 const UMat A, const int32_t offA, const UMat x,
+                 const int32_t offx, const Dtype beta, UMat y,
+                 const int32_t offy)
+{
+    return false;
+}
+
+template<>
+bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA,
+                 const int32_t M, const int32_t N, const float alpha,
+                 const UMat A, const int32_t offA, const UMat x,
+                 const int32_t offx, const float beta, UMat y,
+                 const int32_t offy)
+{
+    ocl::Queue queue = ocl::Queue::getDefault();
+    bool ret = false;
+
+    if (TransA == CblasNoTrans)
+    {
+        ocl::Kernel k(CL_KERNEL_SELECT("matvec_mul4"), cv::ocl::dnn::matvec_mul_oclsrc);
+        if (k.empty())
+            return false;
+
+        uint row_size = M;
+        uint col_size = N;
+        size_t localsize[] = { 128 };
+        size_t globalsize[] = { row_size / 4 * localsize[0] };
+
+        uint argId = 0;
+        k.set(argId++, ocl::KernelArg::PtrReadOnly(A));
+        k.set(argId++, offA);
+        k.set(argId++, cl_uint(col_size));
+        k.set(argId++, cl_uint(col_size%4));
+        k.set(argId++, ocl::KernelArg::PtrReadOnly(x));
+        k.set(argId++, offx);
+        k.set(argId++, alpha);
+        k.set(argId++, beta);
+        k.set(argId++, ocl::KernelArg::PtrWriteOnly(y));
+        k.set(argId++, offy);
+        k.set(argId++, NULL, localsize[0] * sizeof(cl_float4));
+
+        ret = k.run(1, globalsize, localsize, false);
+
+        if ((row_size % 4) != 0 && ret)
+        {
+            ocl::Kernel k_1(CL_KERNEL_SELECT("matvec_mul1"), cv::ocl::dnn::matvec_mul_oclsrc);
+            size_t localsize[] = { 128 };
+            size_t globalsize[] = { row_size % 4 * localsize[0] };
+            uint row_offset = row_size - (row_size % 4);
+
+            uint argId = 0;
+            k_1.set(argId++, ocl::KernelArg::PtrReadOnly(A));
+            k_1.set(argId++, offA);
+            k_1.set(argId++, cl_uint(col_size));
+            k_1.set(argId++, cl_uint(row_offset));
+            k_1.set(argId++, cl_uint(col_size%4));
+            k_1.set(argId++, ocl::KernelArg::PtrReadOnly(x));
+            k_1.set(argId++, offx);
+            k_1.set(argId++, alpha);
+            k_1.set(argId++, beta);
+            k_1.set(argId++, ocl::KernelArg::PtrWriteOnly(y));
+            k_1.set(argId++, offy);
+            k_1.set(argId++, NULL, localsize[0] * sizeof(cl_float));
+
+            ret = k_1.run(1, globalsize, localsize, false);
+        }
+    }
+    return ret;
+}
+
+template<typename Dtype>
+bool ocl4dnnAXPY(const int32_t N, const Dtype alpha,
+                 const UMat X, const int32_t offX, UMat Y,
+                 const int32_t offY)
+{
+    ocl::Context ctx = ocl::Context::getDefault();
+
+    ocl::Kernel oclk_axpy(CL_KERNEL_SELECT("axpy"), cv::ocl::dnn::math_oclsrc);
+    if (oclk_axpy.empty())
+        return false;
+
+    size_t global[] = { 128 * 128 };
+    size_t local[] = { 128 };
+
+    cl_uint argIdx = 0;
+    oclk_axpy.set(argIdx++, N);
+    oclk_axpy.set(argIdx++, alpha);
+    oclk_axpy.set(argIdx++, ocl::KernelArg::PtrReadOnly(X));
+    oclk_axpy.set(argIdx++, offX);
+    oclk_axpy.set(argIdx++, ocl::KernelArg::PtrWriteOnly(Y));
+    oclk_axpy.set(argIdx++, offY);
+
+    return oclk_axpy.run(1, global, local, false);
+}
+
+template bool ocl4dnnAXPY<float>(const int32_t N, const float alpha,
+                                 const UMat X, const int32_t offX,
+                                 UMat Y, const int32_t offY);
+
+#endif  // HAVE_OPENCL
+
+} // namespace ocl4dnn
+} // namespace dnn
+} // namespce cv
diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
new file mode 100644 (file)
index 0000000..13d5afb
--- /dev/null
@@ -0,0 +1,1568 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "../../precomp.hpp"
+
+#include <opencv2/core/utils/configuration.private.hpp>
+
+#include <string>
+#include <vector>
+#include <fstream>
+#include <sys/stat.h>
+#include <assert.h>
+#include "common.hpp"
+#include "ocl4dnn.hpp"
+#include "opencl_kernels_dnn.hpp"
+#include "math_functions.hpp"
+#include "default_kernel_config.hpp"
+
+#if defined WIN32 || defined _WIN32
+#include <windows.h>
+#include <direct.h>
+#endif
+
+#ifdef HAVE_OPENCL
+namespace cv { namespace dnn { namespace ocl4dnn {
+static cv::Mutex kernelConfigMutex;
+typedef std::map<std::string, std::string> kernel_hash_t;
+static kernel_hash_t kernelConfigMap;
+static bool defaultConfigLoaded = false;
+
+template<typename Dtype>
+OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
+{
+    bias_term_ = config.bias_term;
+    int dims = config.in_shape.size();
+    int spatial_dims = 2;
+
+    channels_   = config.in_shape[dims - spatial_dims - 1];
+    num_output_ = config.out_shape[dims - spatial_dims - 1];
+    group_ = config.group;
+
+    prev_kernel_type_ = -1;
+    tuned_ = false;
+
+    // assumption: spatial dimension is 2.
+    kernel_h_ = config.kernel.height;
+    kernel_w_ = config.kernel.width;
+    pad_h_ = config.pad.height;
+    pad_w_ = config.pad.width;
+    stride_h_ = config.stride.height;
+    stride_w_ = config.stride.width;
+    dilation_h_ = config.dilation.height;
+    dilation_w_ = config.dilation.width;
+    M_ = num_output_ / group_;
+    height_ = config.in_shape[dims - spatial_dims + 0];
+    width_ = config.in_shape[dims - spatial_dims + 1];
+    output_h_ = config.out_shape[dims - spatial_dims + 0];
+    output_w_ = config.out_shape[dims - spatial_dims + 1];
+    bottom_dim_ = channels_ * width_ * height_;
+    top_dim_ = num_output_ * output_w_ * output_h_;
+
+    cache_path_ = utils::getConfigurationParameterString("OPENCV_OCL4DNN_CONFIG_PATH", "");
+
+    use_cache_path_ = false;
+    if (!cache_path_.empty())
+    {
+#if defined _WIN32
+        struct _stat file_stat;
+        use_cache_path_ = _stat(cache_path_.c_str(), &file_stat) == 0 &&
+                      ((_S_IFDIR & file_stat.st_mode) != 0);
+#else
+        struct stat file_stat;
+        use_cache_path_ = stat(cache_path_.c_str(), &file_stat) == 0 &&
+                      S_ISDIR(file_stat.st_mode);
+#endif
+        if (!use_cache_path_)
+        {
+            static int warn_ = 0;
+            if (!warn_)
+            {
+                std::cerr
+                    << "OpenCV(ocl4dnn): Kernel configuration cache directory doesn't exist: " << cache_path_ << std::endl
+                    << std::endl;
+                warn_ = true;
+            }
+        }
+    }
+
+    force_auto_tuning_ =
+            (use_cache_path_ && !utils::getConfigurationParameterBool("OPENCV_OCL4DNN_DISABLE_AUTO_TUNING", false))
+            || utils::getConfigurationParameterBool("OPENCV_OCL4DNN_FORCE_AUTO_TUNING", false);
+}
+
+template<typename Dtype>
+OCL4DNNConvSpatial<Dtype>::~OCL4DNNConvSpatial()
+{
+    if (!swizzled_weights_umat.empty()) {
+        swizzled_weights_umat.release();
+    }
+}
+
+template<typename Dtype>
+void OCL4DNNConvSpatial<Dtype>::collectCommonInformation()
+{
+    addDef("Dtype", "float");
+    addDef("Dtype2", "float2");
+    addDef("Dtype4", "float4");
+    addDef("Dtype8", "float8");
+    addDef("Dtype16", "float16");
+    addDef("as_Dtype", "as_float");
+    addDef("as_Dtype2", "as_float2");
+    addDef("as_Dtype4", "as_float4");
+    addDef("as_Dtype8", "as_float8");
+    addDef("Dtype_ID", (int)CV_32F);
+    addDef("Dtype_SIZE", (int)sizeof(Dtype));
+}
+
+typedef enum {
+    KERNEL_TYPE_INTEL_IDLF = 2,
+    KERNEL_TYPE_BASIC = 4,
+    KERNEL_TYPE_GEMM_LIKE = 5
+} ocl4dnnConvSpatialKernelType_t;
+
+template<typename Dtype>
+void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
+                                                   int32_t blockM,
+                                                   int32_t blockK,
+                                                   int32_t blockN)
+{
+    std::string kernelUKey;
+    int32_t simd_size;
+
+    if (kernelType == KERNEL_TYPE_INTEL_IDLF) {
+        simd_size = blockN;
+        kernelUKey = generateSpecificKey(KERNEL_TYPE_INTEL_IDLF, blockM, blockK, 1);
+
+        // kernel name
+        kernel_name_ = "IDLF_";
+        kernel_name_ += kernelUKey;
+        if (simd_size == 16)
+            kernel_name_ += "_SIMD16";
+        else
+            kernel_name_ += "_SIMD8";
+
+        // options
+        options_ << " -cl-fast-relaxed-math -D KERNEL_IDLF -D convolve_simd=" << kernel_name_;
+        if (clOptionSupport("-cl-no-subgroup-ifp"))
+            options_ << " -cl-no-subgroup-ifp ";
+
+        // defs
+        int32_t output_width = output_w_;
+        int32_t output_height = output_h_;
+        int32_t output_block_width = blockM;
+        int32_t output_block_height = blockK;
+        const int32_t last_block_width = (output_width % output_block_width == 0) ?
+                                        output_block_width : output_width % output_block_width;
+        const int32_t last_block_height = (output_height % output_block_height == 0) ?
+                                         output_block_height : output_height % output_block_height;
+        int tile_x = alignSize((output_block_width - 1) * stride_w_ + kernel_w_ * dilation_w_, 4);
+        int tile_y = (output_block_height -1) * stride_h_ + kernel_h_ * dilation_h_;
+        int tile_y_stride = (4 * simd_size) / tile_x;
+        int invec_size = divUp(tile_y, tile_y_stride);
+
+        addDef("SIMD_SIZE", simd_size);
+        addDef("filter_qualifier", "__global");
+        addDef("OUT_BLOCK_WIDTH", output_block_width);
+        addDef("OUT_BLOCK_HEIGHT", output_block_height);
+        addDef("LAST_BLOCK_WIDTH", last_block_width);
+        addDef("LAST_BLOCK_HEIGHT", last_block_height);
+        addDef("INPUT_DEPTH", channels_ / group_);
+        addDef("TOTAL_INPUT_DEPTH_SIZE", channels_);
+        addDef("TOTAL_OUTPUT_DEPTH", num_output_);
+        addDef("INPUT_START_X", 0);
+        addDef("INPUT_START_Y", 0);
+        addDef("INPUT_START_Z", 0);
+        addDef("NUM_FILTERS", M_);
+        addDef("OUT_BUFF_OFFSET", 0);
+        addDef("TILE_X", tile_x);
+        addDef("TILE_Y", tile_y);
+        addDef("TILE_Y_STRIDE", tile_y_stride);
+        addDef("INVEC_SIZE", invec_size);
+        addDef("ALIGNED_NUM_FILTERS", (int)alignSize(M_, simd_size));
+        addDef("OUT_BLOCK_SIZE", (output_block_width*output_block_height));
+        addDef("APPLY_BIAS", bias_term_);
+
+        src_ = cv::ocl::dnn::conv_layer_spatial_oclsrc;
+    }
+    else if (kernelType == KERNEL_TYPE_BASIC)
+    {
+        addDef("KERNEL_BASIC");
+
+        kernelUKey = generateSpecificKey(KERNEL_TYPE_BASIC, blockM, blockK, blockN);
+        kernel_name_ = "BASIC_";
+        kernel_name_ += kernelUKey;
+
+        // opts
+        options_ << " -cl-fast-relaxed-math -D ConvolveBasic=" << kernel_name_;
+        if (clOptionSupport("-cl-no-subgroup-ifp"))
+            options_ << " -cl-no-subgroup-ifp ";
+
+        // defs
+        addDef("CHANNELS", channels_ / group_);
+        addDef("APPLY_BIAS", bias_term_);
+        addDef("OUTPUT_Z", M_);
+        addDef("ZPAR", 1);
+
+        src_ = cv::ocl::dnn::conv_layer_spatial_oclsrc;
+    }
+    else if (kernelType == KERNEL_TYPE_GEMM_LIKE)
+    {
+        simd_size = blockK;
+        kernelUKey = generateSpecificKey(KERNEL_TYPE_GEMM_LIKE, blockM, blockK, blockN);
+
+        kernel_name_ = "U_GEMM_LIKE_CONV_";
+        kernel_name_ += kernelUKey.c_str();
+        kernel_name_ += (blockK == 8) ? "_SIMD8" : "_SIMD16";
+        std::stringstream kernelDef;
+        kernelDef << "GEMM_LIKE_CONV_" << blockN << "_" << blockM;
+        if (blockK == 16)
+            kernelDef << "_SIMD16";
+
+        // Build list of options and defines
+        options_ << " -cl-fast-relaxed-math " << " -D " << kernelDef.str()
+            << " -D Conv_Interleaved=" << kernel_name_.c_str();
+        options_ << " -cl-mad-enable";
+        if (clOptionSupport("-cl-no-subgroup-ifp"))
+            options_ << " -cl-no-subgroup-ifp ";
+
+        addDef("INPUT_DEPTH", channels_);
+        addDef("WIDTH1", M_);
+        addDef("OUT_PADDING_LEFT", 0);
+        addDef("OUT_PADDING_HEIGHT", 0);
+        addDef("OUT_DEPTH", M_);
+        addDef("NUM_BATCHES", num_);
+        addDef("DY", blockM);
+        addDef("DX", blockN);
+        addDef("KERNEL_WIDTH_DIV2", kernel_w_ / 2);
+        addDef("KERNEL_SLICE_DIV2", (kernel_w_ * kernel_h_) / 2);
+        addDef("TILE_N_LAST", M_ % 32);
+        addDef("TILE_N_LAST_DIV8", (M_ % 32) / 8);
+        addDef("APPLY_BIAS", bias_term_);
+        src_ = ocl::dnn::conv_layer_spatial_oclsrc;
+    }
+}
+
+template<typename Dtype>
+void OCL4DNNConvSpatial<Dtype>::setupKernel()
+{
+    collectCommonInformation();
+
+    addDef("KERNEL_WIDTH", kernel_w_);
+    addDef("KERNEL_HEIGHT" , kernel_h_);
+    addDef("STRIDE_X", stride_w_);
+    addDef("STRIDE_Y", stride_h_);
+    addDef("DILATION_X", dilation_w_);
+    addDef("DILATION_Y", dilation_h_);
+    if (kernelType_ != KERNEL_TYPE_BASIC)
+    {
+        addDef("INPUT_PAD_W", pad_w_);
+        addDef("INPUT_PAD_H", pad_h_);
+    }
+
+    setupKernelDetails(kernelType_, blockM_, blockK_, blockN_);
+}
+
+template<typename Dtype>
+bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
+                                        const UMat& weight,
+                                        const UMat& bias,
+                                        UMat& top,
+                                        int32_t numImages)
+{
+    num_ = numImages;
+
+    prepareKernel(bottom, top, weight, bias, numImages);
+    return convolve(bottom, top, weight, bias, numImages, bestKernelConfig, cv::ocl::Queue::getDefault());
+}
+
+template<typename Dtype>
+void OCL4DNNConvSpatial<Dtype>::calculateBenchmark(const UMat &bottom, UMat &verifyTop,
+                                                   const UMat &weight, const UMat &bias,
+                                                   int32_t numImages)
+{
+    options_.str(""); options_.clear(); // clear contents and state flags
+    createBasicKernel(1, 1, 1);
+    kernel_index_ = kernelQueue.size() - 1;
+    convolve(bottom, verifyTop, weight, bias, numImages, kernelQueue[kernel_index_], cv::ocl::Queue::getDefault());
+    CV_Assert(phash.find(kernelQueue[kernel_index_]->kernelName) != phash.end());
+    //unloadProgram(kernelQueue[kernel_index_]->kernelName);
+    kernelQueue.pop_back();
+    return;
+}
+
+#define dbg
+#ifdef dbg
+#define dbgPrint(x) (x)
+#else
+#define dbgPrint(x)
+#endif
+
+// For large enough input size, we do not need to tune kernels for different
+// size. The reason is with large input size, there will be enough work items
+// to feed al the EUs.
+// FIXME for the gemm like convolution, switch back to eaxct image size.
+
+#define TUNING_SIZE(x) ((x) > 256 ? 256 : (alignSize(x, 16)))
+
+template<typename Dtype>
+void OCL4DNNConvSpatial<Dtype>::generateKey()
+{
+    std::stringstream keyBuilder;
+    // FIXME: to support fuse?
+    keyBuilder << "k" << kernel_w_ << "x" << kernel_h_ << "_"
+               << "cn" << channels_ << "_"
+               << "g" << group_ << "_"
+               << "s" << stride_w_ << "x" << stride_h_ << "_"
+               << "d" << dilation_w_ << "x" << dilation_h_ << "_"
+               << "b" << bias_term_ << "_"
+               << "in" << TUNING_SIZE(width_) << "x" << TUNING_SIZE(height_) << "_"
+               << "p" << pad_w_ << "x" << pad_h_ << "_"
+               << "num" << num_ << "_"
+               << "M" << M_;
+
+    key_ = ocl::Device::getDefault().vendorName() + "_EU" + cv::format("%d", ocl::Device::getDefault().maxComputeUnits()) + "_" + keyBuilder.str();
+    key_sanitized_ = key_;
+    for (size_t i = 0; i < key_sanitized_.size(); i++)
+    {
+        char c = key_sanitized_[i];
+        if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'))
+        {
+            key_sanitized_[i] = '_';
+        }
+    }
+    // TODO add hash?
+    // key_sanitized_ = key_sanitized_ + cv::format("_%08llx", crc64((uchar*)key_.c_str(), key_.size()));
+    short_key_ = keyBuilder.str();
+}
+
+template<typename Dtype>
+std::string OCL4DNNConvSpatial<Dtype>::generateSpecificKey(int32_t type, int32_t blockWidth,
+                                                           int32_t blockHeight, int32_t blockDepth)
+{
+    std::stringstream keyBuilder;
+    keyBuilder << short_key_
+               << "_" << type
+               << "_" << blockWidth
+               << "_" << blockHeight
+               << "_" << blockDepth;
+    return keyBuilder.str();
+}
+
+template<typename Dtype>
+void interleaveMatrix(Dtype* mem_dst, const Dtype *mem,
+                      int r, int c, int interleavedRows, int nonInterleavedRows,
+                      int blockWidth, int rowAlignment )
+{
+    CHECK_EQ(interleavedRows % 2, 0) <<
+             "interleaveMatrix only supports even values for interleavedRows.";
+
+    size_t memSize = r * c * sizeof(float);
+    size_t dstSize = memSize *
+                     (interleavedRows + nonInterleavedRows * 2) /
+                     (interleavedRows + nonInterleavedRows);
+    memset(mem_dst, 0, dstSize);    // NOLINT
+
+    const int xStride = blockWidth;
+    const int yStride = c * 2;
+    const Dtype *pSrc = mem;
+    Dtype* pDst = mem_dst;
+    for (int y = 0; y < r;) {
+        for (int rows = 0; rows < interleavedRows; rows += 2) {
+            if ( y >= r ) break;
+            if ((c % xStride) == 0) {
+                for (int x = 0; x < c / xStride; x++) {
+                    memcpy(pDst + x * xStride * 2,                         // NOLINT
+                           pSrc + x * xStride,     xStride * sizeof(Dtype));
+                    memcpy(pDst + x * xStride * 2 + xStride,               // NOLINT
+                           pSrc + x * xStride + c, xStride * sizeof(Dtype));
+                }
+            } else {
+                const int count = c / xStride;
+                int x = 0;
+                for (; x < count - 1; x++) {
+                    memcpy(pDst + x * xStride * 2,                          // NOLINT
+                           pSrc + x * xStride, xStride * sizeof(Dtype));
+                    memcpy(pDst + x * xStride * 2 + xStride,                // NOLINT
+                           pSrc + x * xStride + c, xStride * sizeof(Dtype));
+                }
+                memcpy(pDst + x * xStride * 2,                            // NOLINT
+                       pSrc + x * xStride, xStride * sizeof(Dtype));
+            }
+            pSrc += yStride;
+            pDst += yStride;
+            y += 2;
+        }
+
+        for (int rows = 0; rows < nonInterleavedRows; rows++) {
+            if (y >= r) break;
+            const int stride = rowAlignment;
+            int remaining = c;
+            for (int x = 0; x < c; x += stride) {
+                if (remaining >= stride) {
+                    memcpy(pDst + x * 2, pSrc + x, stride * sizeof(Dtype));    // NOLINT
+                    remaining -=stride;
+                } else {
+                    memcpy(pDst + x * 2, pSrc + x, remaining * sizeof(Dtype));  // NOLINT
+                }
+            }
+            pSrc += yStride / 2;
+            pDst += yStride;
+            y++;
+        }
+    }
+}
+
+template<typename Dtype>
+bool OCL4DNNConvSpatial<Dtype>::swizzleWeight(const UMat &weight,
+                                              int32_t swizzled_factor,
+                                              bool interleave)
+{
+    // Simply skip the weight swizzle if we already got a swizzled_weights_
+    // in test phase and not in auto tuning
+    // This requires we always call convolve again with the winner configuration
+    // during the auto tuning stage.
+    if (tuned_ && !swizzled_weights_umat.empty())
+        return true;
+
+    if (swizzled_weights_umat.empty())
+        swizzled_weights_umat.create(1, (int)alignSize(num_output_, 16) * channels_ *
+                                     kernel_h_ * (int)alignSize(kernel_w_, 2), CV_32FC1);
+
+    ocl::Queue queue = ocl::Queue::getDefault();
+    if (!interleave) {
+        cl_uint argIdx = 0;
+        int32_t channels = channels_ / group_;
+
+        ocl::Kernel oclk_copy_weight(CL_KERNEL_SELECT("copyWeightsSwizzled"),
+                                     cv::ocl::dnn::conv_spatial_helper_oclsrc);
+        if (oclk_copy_weight.empty())
+            return false;
+
+        oclk_copy_weight.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight));
+        oclk_copy_weight.set(argIdx++, ocl::KernelArg::PtrWriteOnly(swizzled_weights_umat));
+        oclk_copy_weight.set(argIdx++, kernel_w_);
+        oclk_copy_weight.set(argIdx++, kernel_h_);
+        oclk_copy_weight.set(argIdx++, channels);
+        oclk_copy_weight.set(argIdx++, num_output_);
+        oclk_copy_weight.set(argIdx++, swizzled_factor);
+
+        size_t global_work_size_copy[3] = {
+            (size_t) (alignSize(num_output_, swizzled_factor) * channels * kernel_w_ * kernel_h_), 1, 1 };
+
+        if (!oclk_copy_weight.run(3, global_work_size_copy, NULL, false))
+        {
+            std::cout << "Swizzle kernel run failed." << std::endl;
+            return false;
+        }
+    } else {
+        // assumption: kernel dimesion is 2
+        Mat weightMat = weight.getMat(ACCESS_READ);
+        Dtype* cpu_weight = (Dtype *)weightMat.ptr<float>();
+        Mat swizzledWeightMat = swizzled_weights_umat.getMat(ACCESS_WRITE);
+        Dtype* cpu_swizzled_weight = (Dtype *)swizzledWeightMat.ptr<float>();
+
+        int interleavedRows = (kernel_w_ / 2) * 2;
+        int nonInterleavedRows = kernel_w_ % 2;
+        int blockWidth = swizzled_factor;  // should equal to simd size.
+        int rowAlignment = 32;
+        size_t interleaved_filter_size = M_ * kernel_w_ * kernel_h_ * channels_ * sizeof(Dtype);
+        Dtype * tmpSwizzledWeight = reinterpret_cast<Dtype*>(malloc(interleaved_filter_size));
+        CHECK_EQ(tmpSwizzledWeight != NULL, true) << "Failed to allocate temporary swizzled weight";
+        for (int od = 0; od < M_; od++)
+            for (int id = 0; id < channels_; id++)
+                for (int r = 0; r < kernel_h_; r++)
+                    for (int c = 0; c < kernel_w_; c++)
+                        tmpSwizzledWeight[((id * kernel_h_ + r)* kernel_w_ + c) * M_ + od] =
+                            cpu_weight[((od * channels_ + id) * kernel_h_ + r)*kernel_w_+c];
+        interleaveMatrix(cpu_swizzled_weight,
+                         tmpSwizzledWeight,
+                         kernel_w_ * kernel_h_ * channels_, M_,
+                         interleavedRows,
+                         nonInterleavedRows,
+                         blockWidth,
+                         rowAlignment);
+        free(tmpSwizzledWeight);
+    }
+    return true;
+}
+
+template<>
+bool OCL4DNNConvSpatial<float>::createBasicKernel(int32_t blockWidth,
+                                                  int32_t blockHeight, int32_t blockDepth)
+{
+    kernelType_ = KERNEL_TYPE_BASIC;
+    blockM_ = blockWidth;
+    blockK_ = blockHeight;
+    blockN_ = blockDepth;
+    setupKernel();
+
+    ocl::Program program = compileKernel();
+    if (program.ptr())
+    {
+        int32_t workItemOutput[3] = { 1, 1, 1 };
+        size_t globalSize[3] = { (size_t)output_w_, (size_t)output_h_, (size_t)M_ };
+        kernelQueue.push_back(makePtr<kernelConfig>(kernel_name_, &globalSize[0], (const size_t*)NULL, &workItemOutput[0],
+                                                    false, KERNEL_TYPE_BASIC));
+        return true;
+    }
+    else
+        return false;
+}
+
+template<>
+void OCL4DNNConvSpatial<float>::CreateSubBuffer(const UMat& buffer, UMat& sub_buffer,
+                                                int32_t offset, int32_t size, bool write_only)
+{
+    cl_mem sub_mem;
+    cl_buffer_region region;
+    cl_int err;
+
+    region.origin = offset * sizeof(float);
+    region.size = size * sizeof(float);
+    sub_mem = clCreateSubBuffer((cl_mem)buffer.handle(ACCESS_READ),
+                                write_only ? CL_MEM_WRITE_ONLY : CL_MEM_READ_ONLY,
+                                CL_BUFFER_CREATE_TYPE_REGION, &region, &err);
+    if (err)
+    {
+        std::cout << "Failed to create sub buffer." << std::endl;
+        return;
+    }
+
+    int step = sizeof(float), rows = size, cols = 1;
+    ocl::convertFromBuffer(sub_mem, step, rows, cols, CV_32FC1, sub_buffer);
+
+    //decrease ocl mem refcount
+    clReleaseMemObject(sub_mem);
+}
+
+template<>
+bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
+                                         const UMat &weight, const UMat &bias,
+                                         int32_t numImages, kernelConfig* config,
+                                         const cv::ocl::Queue& queue)
+{
+    ocl::Program program;
+    phash_t::iterator it = phash.find(config->kernelName);
+    if (it != phash.end())
+        program = it->second;
+    else
+        return false;
+
+    int32_t bias_offset;
+
+    if (config->kernelType == KERNEL_TYPE_INTEL_IDLF) {
+        if (!swizzleWeight(weight, config->workItem_output[2], false))
+            return false;
+        size_t total_bottom_size = bottom_dim_ * numImages;
+        size_t total_kernel_size = kernel_h_ * kernel_w_ * channels_ * M_;
+        size_t total_bias_size = M_ * group_;
+        size_t total_top_size = top_dim_ * numImages;
+        for (int32_t g = 0; g < group_; ++g) {
+            bias_offset = M_ * g;
+            int32_t image_offset = width_ * height_ * (channels_ / group_) * g;
+            int32_t output_image_offset = output_w_ * output_h_ * M_ * g;
+            int32_t kernel_offset = kernel_h_ * kernel_w_ * (channels_ / group_) * M_ * g;
+
+            ocl::Kernel kernel(config->kernelName.c_str(), program);
+            if (kernel.empty())
+                return false;
+
+            cl_uint argIdx = 0;
+
+            UMat img_buffer;
+            if (image_offset)
+            {
+                CreateSubBuffer(bottom, img_buffer, image_offset,
+                                total_bottom_size - image_offset, false);
+                if (img_buffer.empty())
+                    return false;
+
+                kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(img_buffer));
+            }
+            else
+            {
+                kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
+            }
+
+            UMat kernel_buffer;
+            if (kernel_offset)
+            {
+                CreateSubBuffer(swizzled_weights_umat, kernel_buffer, kernel_offset,
+                                total_kernel_size - kernel_offset, false);
+                if (kernel_buffer.empty())
+                    return false;
+
+                kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(kernel_buffer));
+            }
+            else
+            {
+                kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(swizzled_weights_umat));
+            }
+
+            UMat bias_buffer;
+            if (bias_term_)
+            {
+                if (bias_offset)
+                {
+                    CreateSubBuffer(bias, bias_buffer, bias_offset,
+                                    total_bias_size - bias_offset, false);
+                    if (bias_buffer.empty())
+                        return false;
+
+                    kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias_buffer));
+                }
+                else
+                {
+                    kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias));
+                }
+            }
+
+            UMat out_buffer;
+            if (output_image_offset)
+            {
+                CreateSubBuffer(top, out_buffer, output_image_offset,
+                                total_top_size - output_image_offset, true);
+                if (out_buffer.empty())
+                    return false;
+
+                kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(out_buffer));
+            }
+            else
+            {
+                kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top));
+            }
+
+            kernel.set(argIdx++, (uint16_t)width_);
+            kernel.set(argIdx++, (uint16_t)height_);
+            kernel.set(argIdx++, (uint16_t)output_w_);
+            kernel.set(argIdx++, (uint16_t)output_h_);
+            if (!kernel.run(3, config->global_work_size, config->local_work_size, false))
+            {
+                std::cout << "IDLF kernel run failed." << std::endl;
+                return false;
+            }
+        }
+    } else if (config->kernelType == KERNEL_TYPE_GEMM_LIKE) {
+        if (!swizzleWeight(weight, config->workItem_output[1], true))
+            return false;
+        size_t total_bottom_size = bottom_dim_ * numImages;
+        size_t total_kernel_size = kernel_h_ * kernel_w_ * channels_ * M_;
+        size_t total_bias_size = M_ * group_;
+        size_t total_top_size = top_dim_ * numImages;
+        for (int32_t g = 0; g < group_; ++g) {
+            bias_offset = M_ * g;
+            int32_t image_offset = width_ * height_ * (channels_ / group_) * g;
+            int32_t output_image_offset = output_w_ * output_h_ * M_ * g;
+            int32_t kernel_offset = kernel_h_ * kernel_w_ * (channels_ / group_) * M_ * g;
+
+            ocl::Kernel kernel(config->kernelName.c_str(), program);
+            if (kernel.empty())
+                return false;
+
+            cl_uint argIdx = 0;
+
+            UMat img_buffer;
+            if (image_offset)
+            {
+                CreateSubBuffer(bottom, img_buffer, image_offset,
+                                total_bottom_size - image_offset, false);
+                if (img_buffer.empty())
+                    return false;
+
+                kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(img_buffer));
+            }
+            else
+            {
+                kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
+            }
+
+            UMat kernel_buffer;
+            if (kernel_offset)
+            {
+                CreateSubBuffer(swizzled_weights_umat, kernel_buffer, kernel_offset,
+                                total_kernel_size - kernel_offset, false);
+                if (kernel_buffer.empty())
+                    return false;
+
+                kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(kernel_buffer));
+            }
+            else
+            {
+                kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(swizzled_weights_umat));
+            }
+
+            UMat bias_buffer;
+            if (bias_term_)
+            {
+                if (bias_offset)
+                {
+                    CreateSubBuffer(bias, bias_buffer, bias_offset,
+                                    total_bias_size - bias_offset, false);
+                    if (bias_buffer.empty())
+                        return false;
+
+                    kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias_buffer));
+                }
+                else
+                {
+                    kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias));
+                }
+            }
+
+            UMat out_buffer;
+            if (output_image_offset)
+            {
+                CreateSubBuffer(top, out_buffer, output_image_offset,
+                                total_top_size - output_image_offset, true);
+                if (out_buffer.empty())
+                    return false;
+
+                kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(out_buffer));
+            }
+            else
+            {
+                kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top));
+            }
+
+            kernel.set(argIdx++, (uint16_t)width_);
+            kernel.set(argIdx++, (uint16_t)height_);
+            kernel.set(argIdx++, (uint16_t)output_w_);
+            kernel.set(argIdx++, (uint16_t)output_h_);
+
+            int out_pitch_y = output_w_ * output_h_;
+            int out_pitch_z = out_pitch_y * M_;
+            int aligned_input_size = height_ * width_ * channels_ / group_;
+            int slice_pitch = width_ * height_;
+            kernel.set(argIdx++, (uint32_t)out_pitch_y);
+            kernel.set(argIdx++, (uint32_t)out_pitch_z);
+            kernel.set(argIdx++, (uint32_t)aligned_input_size);
+            kernel.set(argIdx++, (uint32_t)slice_pitch);
+
+            int blockM = config->workItem_output[0];
+            int blockK = config->workItem_output[1];
+            int blockN = config->workItem_output[2];
+            int alignedFilterWidth = alignSize(M_, blockN);
+            int alignedExpandHeight = alignSize(output_w_ * output_h_, blockM);
+            int globalWorkSizeDX = blockN;
+            int globalWorkSizeDY = blockM;
+            size_t sgemm_m = alignedExpandHeight;
+            size_t sgemm_n = alignedFilterWidth;
+            size_t gx = divUp(sgemm_n, globalWorkSizeDX);
+            size_t gy = divUp(sgemm_m, globalWorkSizeDY);
+            gy = alignSize(gy, blockK);
+            size_t global_size[3] = { gx, gy, config->global_work_size[2] };
+
+            if (!kernel.run(3, global_size, config->local_work_size, false))
+            {
+                std::cout << "GEMM like kernel run failed." << std::endl;
+                return false;
+            }
+        }
+    } else {
+        for (int32_t n = 0; n < numImages; ++n) {
+            for (int32_t g = 0; g < group_; ++g) {
+                bias_offset = M_ * g;
+                int32_t image_offset = n * bottom_dim_
+                    + width_ * height_ * (channels_ / group_) * g;
+                int32_t output_image_offset = n * top_dim_
+                    + output_w_ * output_h_ * M_ * g;
+
+                cl_uint argIdx = 0;
+                int32_t kernel_offset = kernel_h_ * kernel_w_ * (channels_ / group_) * M_ * g;
+
+                ocl::Kernel kernel(config->kernelName.c_str(), program);
+                if (kernel.empty())
+                    return false;
+
+                kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
+                kernel.set(argIdx++, image_offset);
+                kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight));
+                kernel.set(argIdx++, kernel_offset);
+                if (bias_term_)
+                    kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias));
+                else
+                    kernel.set(argIdx++, (void *)NULL);
+                kernel.set(argIdx++, bias_offset);
+                kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top));
+                kernel.set(argIdx++, output_image_offset);
+                kernel.set(argIdx++, (uint16_t)width_);
+                kernel.set(argIdx++, (uint16_t)height_);
+                kernel.set(argIdx++, (uint16_t)output_w_);
+                kernel.set(argIdx++, (uint16_t)output_h_);
+                kernel.set(argIdx++, (uint16_t)pad_w_);
+                kernel.set(argIdx++, (uint16_t)pad_h_);
+                if (!kernel.run(3, config->global_work_size,
+                                (config->use_null_local) ? NULL : config->local_work_size,
+                                false))
+                {
+                    std::cout << "Basic kernel run failed." << std::endl;
+                    return false;
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+template<>
+float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top,
+                                               const UMat &weight, const UMat &bias,
+                                               int32_t numImages, kernelConfig* config)
+{
+    cv::ocl::Queue profilingQueue;
+    try
+    {
+        profilingQueue = cv::ocl::Queue::getDefault().getProfilingQueue();
+    }
+    catch (const cv::Exception&)
+    {
+        static int warn_ = 0;
+        if (!warn_)
+        {
+            std::cout << "OpenCV(ocl4dnn): Can't create OpenCL profiling queue for auto-tuning." << std::endl;
+            warn_ = true;
+        }
+        return 1e6;
+    }
+
+    // warm up.
+    bool saved_tuned = tuned_;
+    tuned_ = false;
+    convolve(bottom, top, weight, bias, numImages, config, profilingQueue);
+
+    cv::ocl::Timer timer(profilingQueue);
+    timer.start();
+    bool res = true;;
+    dbgPrint(std::cout << "Benchmarking kernel: " << config->kernelName << std::endl);
+    tuned_ = true;
+    int loop_cnt = 4;
+    for (int i = 0; i < loop_cnt; i++) {
+        res = convolve(bottom, top, weight, bias, numImages, config, profilingQueue);
+        if (!res)
+            break;
+    }
+    tuned_ = saved_tuned;
+    timer.stop();
+    if (!res) {
+        config->tested = true;
+        config->verified = false;
+        return 1e5;
+    }
+
+    float elapsedTime = timer.milliSeconds() / loop_cnt;
+    #ifdef dbg
+    double out_w = output_w_;
+    double out_h = output_h_;
+    double out_z = M_;
+    double k_w = kernel_w_;
+    double k_h = kernel_h_;
+    double k_z = channels_;
+    double totalFlops = ((k_w*k_h*k_z -1)*2)*(out_w*out_h*out_z)*num_;
+    std::cout << "\tEstimated Gflops:" << ((totalFlops/1000)/1000)/1000
+              << std::endl;
+    std::cout << "\tEstimated GFLOPS/S: " << (((totalFlops/1000)/1000)/1000)*(1000.0/elapsedTime)
+              << std::endl;
+    #if 0
+    std::cout << "Estimated utilization: " <<
+        ((((totalFlops/1000)/1000)/1000)*(1000.0/elapsedTime))/880.0
+        << std::endl;
+    #endif
+    #endif
+    return elapsedTime;
+}
+
+template<>
+bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
+                                             UMat &top,
+                                             const UMat &weight,
+                                             const UMat &bias,
+                                             int32_t numImages,
+                                             kernelConfig* config,
+                                             UMat &verifyTop)
+{
+
+    uint32_t verificationFail = 0;
+
+    if (config->verified)
+        return true;
+    else if (config->tested)
+        return false;
+
+    int32_t sz[4] = {numImages, num_output_, output_h_, output_w_};
+    top.zeros(4, sz, CV_32FC1);
+    bool saved_tuned = tuned_;
+    tuned_ = false;
+    convolve(bottom, top, weight, bias, numImages, config, cv::ocl::Queue::getDefault());
+    tuned_ = saved_tuned;
+
+    float *data = (float *)top.getMat(ACCESS_READ).ptr<float>();
+    float *verify_data = (float *)verifyTop.getMat(ACCESS_READ).ptr<float>();
+
+    for (int32_t n = 0; n < num_; ++n) {
+        for (int32_t g = 0; g < group_; ++g) {
+            int32_t output_image_offset = n * top_dim_ + output_w_ * output_h_ * M_ * g;
+            for (int out_ch = 0; out_ch < M_ && !verificationFail; out_ch++)
+                for (int h = 0; h < output_h_ && !verificationFail; h++)
+                    for (int w = 0; w < output_w_; w++) {
+                        size_t offset = output_image_offset + out_ch * output_w_ * output_h_ + h * output_w_ + w;
+                        if (fabs(data[offset] - verify_data[offset]) > 0.1 * fabs(verify_data[offset]) &&
+                            !(fabs(verify_data[offset]) < 1.e-3 &&
+                            fabs(data[offset] - verify_data[offset]) < 1.e-4))
+                        {
+                            dbgPrint(printf("test verification failed @ image %d group %d"
+                                            "out_ch %d h %d w %d got %G expected %G\n",
+                                            n, g, out_ch, h, w, data[offset], verify_data[offset]));
+                            verificationFail = 1;
+                            goto out;
+                        }
+                    }
+        }
+    }
+out:
+    if (verificationFail == 1)
+        return false;
+    else
+        return true;
+}
+
+template<typename Dtype>
+void OCL4DNNConvSpatial<Dtype>::unloadProgram(const std::string& kernelName)
+{
+    ocl::Program program;
+    phash_t::iterator it = phash.find(kernelName);
+    if (it != phash.end())
+    {
+        program = it->second;
+        it->second = ocl::Program();
+    }
+    else
+        return;
+
+    ocl::Context ctx = ocl::Context::getDefault();
+    ctx.unloadProg(program);
+}
+
+template<typename Dtype>
+ocl::Program OCL4DNNConvSpatial<Dtype>::compileKernel()
+{
+    phash_t::iterator it = phash.find(kernel_name_);
+    if (it != phash.end())
+    {
+        return it->second;
+    }
+
+    String errmsg;
+    ocl::Context ctx = ocl::Context::getDefault();
+    std::string options = options_.str();
+    CV_Assert(options.size() != 0);
+    ocl::Program program = ctx.getProg(src_, options, errmsg);
+
+    phash.insert(std::pair<std::string, ocl::Program>(kernel_name_, program));
+    if (!program.ptr())
+    {
+        std::cout << "Failed to compile kernel: " << kernel_name_
+                  << ", buildflags: " << options
+                  << ", errmsg: " << errmsg << std::endl;
+    }
+    return program;
+}
+
+template<>
+bool OCL4DNNConvSpatial<float>::createGEMMLikeConvKernel(int32_t blockM,
+                                                         int32_t blockK,
+                                                         int32_t blockN)
+{
+    int32_t simd_size = blockK;
+
+    int workItemOutput[3] = { blockM, blockK, blockN };
+    size_t gx = (size_t)divUp(M_, blockN);
+    size_t gy = (size_t)divUp(output_w_ * output_h_, blockM);
+    gy = alignSize(gy, simd_size);
+    size_t gz = num_;
+    size_t global_size[3] = { gx, gy, gz };
+    size_t local_size[3] = { 1, static_cast<size_t>(simd_size), 1 };
+
+    kernelType_ = KERNEL_TYPE_GEMM_LIKE;
+    blockM_ = blockM;
+    blockK_ = blockK;
+    blockN_ = blockN;
+    setupKernel();
+
+    ocl::Program program = compileKernel();
+    if (program.ptr())
+    {
+        size_t workgroupSize_used;
+        ocl::Kernel kernel(kernel_name_.c_str(), program);
+        if (kernel.empty())
+            return false;
+
+        workgroupSize_used = kernel.preferedWorkGroupSizeMultiple();
+        if (workgroupSize_used != simd_size)
+        {
+            std::cerr << "OpenCV(ocl4dnn): The OpenCL compiler chose a simd size (" << workgroupSize_used << ") that " << std::endl;
+            std::cerr << "                 does not equal the size (" << simd_size << ") kernel source required." << std::endl;
+            std::cerr << "                 Skip this kernel " << kernel_name_ << std::endl;
+            unloadProgram(kernel_name_);
+            return false;
+        }
+        else
+        {
+            kernelQueue.push_back(makePtr<kernelConfig>(kernel_name_, &global_size[0], &local_size[0], &workItemOutput[0],
+                                                        true, KERNEL_TYPE_GEMM_LIKE));
+            return true;
+        }
+    }
+    else
+        return false;
+}
+
+template<>
+bool OCL4DNNConvSpatial<float>::setupIDLF(int32_t blockWidth,
+                                          int32_t blockHeight,
+                                          int32_t simd_size)
+{
+    int32_t workItemOutput[3] = { blockWidth, blockHeight, simd_size };
+    const int32_t num_output_maps = M_;
+    int32_t output_width = output_w_;
+    int32_t output_height = output_h_;
+    int32_t output_block_width = blockWidth;
+    int32_t output_block_height = blockHeight;
+    int32_t num_batches = num_;
+
+    size_t global_size[3] = {
+        (size_t)divUp(output_width, output_block_width),
+        (size_t)divUp(output_height, output_block_height),
+        (size_t)num_batches * alignSize(num_output_maps, simd_size) };
+    size_t local_size[3] = { 1, 1, static_cast<size_t>(simd_size) };
+
+    kernelType_ = KERNEL_TYPE_INTEL_IDLF;
+    blockM_ = blockWidth;
+    blockK_ = blockHeight;
+    blockN_ = simd_size;
+
+    setupKernel();
+
+    ocl::Program program = compileKernel();
+    if (program.ptr())
+    {
+        size_t workgroupSize_used;
+        ocl::Kernel kernel(kernel_name_.c_str(), program);
+        if (kernel.empty())
+            return false;
+
+        workgroupSize_used = kernel.preferedWorkGroupSizeMultiple();
+        if (workgroupSize_used != simd_size)
+        {
+            std::cerr << "OpenCV(ocl4dnn): The OpenCL compiler chose a simd size (" << workgroupSize_used << ") that " << std::endl;
+            std::cerr << "                 does not equal the size (" << simd_size << ") kernel source required." << std::endl;
+            std::cerr << "                 Skip this kernel " << kernel_name_ << std::endl;
+            unloadProgram(kernel_name_);
+            return false;
+        }
+        else
+        {
+            kernelQueue.push_back(makePtr<kernelConfig>(kernel_name_, &global_size[0], &local_size[0], &workItemOutput[0],
+                                                        true, KERNEL_TYPE_INTEL_IDLF));
+            return true;
+        }
+    }
+    else
+        return false;
+}
+
+template<>
+bool OCL4DNNConvSpatial<float>::createConvolutionKernel(int32_t kernelType,
+                                                        int32_t blockWidth,
+                                                        int32_t blockHeight,
+                                                        int32_t blockDepth)
+{
+    kernelType_ = kernelType;
+    options_.str(""); options_.clear(); // clear contents and state flags
+    src_ = ocl::ProgramSource();
+
+    if (kernelType == KERNEL_TYPE_INTEL_IDLF)
+        return setupIDLF(blockWidth, blockHeight, blockDepth);
+    else if (kernelType == KERNEL_TYPE_BASIC)
+        return createBasicKernel(blockWidth, blockHeight, blockDepth);
+    else if (kernelType == KERNEL_TYPE_GEMM_LIKE)
+        return createGEMMLikeConvKernel(blockWidth, blockHeight, blockDepth);
+    else
+        CV_Assert(0 && "Internal error");
+    return false;
+}
+
+template<>
+void OCL4DNNConvSpatial<float>::generateTunerItems(std::vector< cv::Ptr<tunerParam> > &tunerItems)
+{
+    if (ocl::Device::getDefault().intelSubgroupsSupport()) {
+        /* IDLF kernels are using Intel specific extension which make
+           them intel only. */
+        // Generates static key_
+        int max_compute_units = ocl::Device::getDefault().maxComputeUnits();
+        int kernelCnt = 0;
+        if (group_ == 1 && ((M_ % 8 == 0) && (M_ % 32 != 24))) {
+            tunerItems.push_back(makePtr<tunerParam>(KERNEL_TYPE_GEMM_LIKE, 1, 8, 32));
+            tunerItems.push_back(makePtr<tunerParam>(KERNEL_TYPE_GEMM_LIKE, 2, 8, 32));
+
+            if (kernel_w_ < 4 && M_ % 32 == 0)
+                tunerItems.push_back(makePtr<tunerParam>(KERNEL_TYPE_GEMM_LIKE, 1, 16, 32));
+        }
+
+        for (int simd_size = 8; simd_size <= 16; simd_size += 8) {
+            if (simd_size == 8 && !((group_ == 1 || M_ % 8 == 0)))
+                continue;
+            if (simd_size == 16 && !(group_ == 1 || M_ % 16 == 0))
+                continue;
+            const int width_max = 14, height_max = 8, block_size_max = 32;
+            for (uint32_t width = width_max; width > 0; width--) {
+                int candidate = 0;
+                if (width > output_w_)
+                    continue;
+                for (uint32_t height = height_max; height > 0; height--) {
+                    if (width * height > block_size_max || height > output_h_)
+                        continue;
+                    // Only when the work items count is less than the device
+                    // max work items or the M_ is less than 16, we will tune
+                    // for simd 8.
+                    if (simd_size == 8 &&
+                        M_ >= 16 &&
+                        ((num_ * M_ * output_w_ * output_h_ / static_cast<float>(width * height)) >=
+                        max_compute_units * 7 * 16))
+                        continue;
+                    int actual_tile_x = kernel_w_ * dilation_w_ + (width - 1) * stride_w_;
+                    int tile_x = alignSize(actual_tile_x, 4);
+                    int tile_y = kernel_h_ * dilation_h_ + (height - 1) * stride_h_;
+                    if (tile_x > (4 * simd_size))
+                        continue;
+                    // If actual_tile_x is multiple of 4, we may waste some IO bandwidth.
+                    // This could reduce 75% tuning candidates. It has slightly performance
+                    // impact for the final tuning result, less than 2% for most cases.
+                    if (actual_tile_x % 4 != 0)
+                        continue;
+                    if ((width * height + divUp(tile_x * tile_y, simd_size)) > block_size_max)
+                        continue;
+                    int tile_y_stride = (4 * simd_size) / tile_x;
+
+                    if (divUp(tile_y, tile_y_stride) < 4) {
+                        tunerItems.push_back(makePtr<tunerParam>(KERNEL_TYPE_INTEL_IDLF, width, height, simd_size));
+                        candidate++;
+                    }
+                    if (candidate >= 4 && height == 2)
+                        break;
+                }
+                kernelCnt += candidate;
+                if (kernelCnt >= 12 && width == 2)
+                    break;
+            }
+        }
+    }
+}
+
+template<>
+void OCL4DNNConvSpatial<float>::useFirstAvailable(const UMat &bottom,
+                                                  UMat &top,
+                                                  const UMat &weight,
+                                                  const UMat &bias,
+                                                  int32_t numImages,
+                                                  UMat &verifyTop)
+{
+    std::vector< cv::Ptr<tunerParam> > tunerItems;
+    generateTunerItems(tunerItems);
+    tunerItems.push_back(makePtr<tunerParam>(KERNEL_TYPE_BASIC, 1, 1, 1));
+
+    for (int i = 0; i < tunerItems.size(); i++) {
+        if (createConvolutionKernel(tunerItems[i]->kernelType,
+                                    tunerItems[i]->blockWidth,
+                                    tunerItems[i]->blockHeight,
+                                    tunerItems[i]->blockDepth)) {
+            int kernelIdx = kernelQueue.size() - 1;
+            if (verifyResult(bottom, top, weight, bias, numImages, kernelQueue[kernelIdx], verifyTop)) {
+                bestKernelConfig = kernelQueue[kernelIdx];
+                if (bestKernelConfig->kernelType != KERNEL_TYPE_INTEL_IDLF &&
+                    bestKernelConfig->kernelType != KERNEL_TYPE_GEMM_LIKE)
+                    if (!swizzled_weights_umat.empty())
+                        swizzled_weights_umat.release();
+
+                for (int32_t j = 0; j < kernelIdx; j++) {
+                    CV_Assert(phash.find(kernelQueue[j]->kernelName) != phash.end());
+                    unloadProgram(kernelQueue[j]->kernelName);
+                }
+                kernelQueue.clear();
+                tuned_ = true;
+                break;
+            }
+        }
+    }
+}
+
+template<>
+void OCL4DNNConvSpatial<float>::cacheTunedConfig()
+{
+    if (tuned_)
+    {
+        cv::AutoLock lock(kernelConfigMutex);
+        std::stringstream outputKernel;
+        outputKernel << bestKernelConfig->workItem_output[0] << " "
+                     << bestKernelConfig->workItem_output[1] << " "
+                     << bestKernelConfig->workItem_output[2] << " "
+                     << bestKernelConfig->kernelType << " "
+                     << bestKernelConfig->local_work_size[0] << " "
+                     << bestKernelConfig->local_work_size[1] << " "
+                     << bestKernelConfig->local_work_size[2] << " "
+                     << bestKernelConfig->swizzle_weights << " "
+                     << bestKernelConfig->use_null_local << " ";
+        kernelConfigMap.insert(std::pair<std::string, std::string>(key_, outputKernel.str()));
+    }
+}
+
+template<>
+void OCL4DNNConvSpatial<float>::setupConvolution(const UMat &bottom,
+                                                 UMat &top,
+                                                 const UMat &weight,
+                                                 const UMat &bias,
+                                                 int32_t numImages,
+                                                 UMat &verifyTop)
+{
+    std::vector< cv::Ptr<tunerParam> > tunerItems;
+
+    generateTunerItems(tunerItems);
+    for (int i = 0; i < tunerItems.size(); i++)
+        createConvolutionKernel(tunerItems[i]->kernelType,
+                                tunerItems[i]->blockWidth,
+                                tunerItems[i]->blockHeight,
+                                tunerItems[i]->blockDepth);
+
+    for (int32_t x = 0; x < kernelQueue.size(); x++) {
+        kernelQueue[x]->executionTime = timedConvolve(bottom, top, weight, bias, numImages,
+                                                      kernelQueue[x]);
+        #ifdef TEST_ALL_KERNELS
+        if (kernelQueue[x]->tested == false) {
+            bool verified = verifyResult(bottom, top, weight, bias, numImages, kernelQueue[x], verifyTop);
+            if (verified == false) {
+                dbgPrint(std::cout << "Kernel "
+                         << kernelQueue[x]->kernelName
+                         << " failed verification" << std::endl);
+                dbgPrint(std::cout << "kernelQueue[x]->workItem_output[0]: "
+                         << kernelQueue[x]->workItem_output[0] << " "
+                         << "kernelQueue[x]->workItem_output[1]: "
+                         << kernelQueue[x]->workItem_output[1] << " "
+                         << "kernelQueue[x]->workItem_output[2]: "
+                         << kernelQueue[x]->workItem_output[2] << " "
+                         << "kernelQueue[x]->kernelType: "
+                         << kernelQueue[x]->kernelType << " "
+                         << "kernelQueue[x]->global_work_size[0]: "
+                         << kernelQueue[x]->global_work_size[0] << " "
+                         << "kernelQueue[x]->global_work_size[1]: "
+                         << kernelQueue[x]->global_work_size[1] << " "
+                         << "kernelQueue[x]->global_work_size[2]: "
+                         << kernelQueue[x]->global_work_size[2] << " "
+                         << "kernelQueue[x]->local_work_size[0]: "
+                         << kernelQueue[x]->local_work_size[0] << " "
+                         << "kernelQueue[x]->local_work_size[1]: "
+                         << kernelQueue[x]->local_work_size[1] << " "
+                         << "kernelQueue[x]->local_work_size[2]: "
+                         << kernelQueue[x]->local_work_size[2] << " "
+                         << kernelQueue[x]->swizzle_weights << " "
+                         << kernelQueue[x]->use_null_local << std::endl);
+            } else {
+                dbgPrint(std::cout << "Kernel "
+                         << kernelQueue[x]->kernelName
+                         << " pass verification" << std::endl);
+            }
+        }
+        #endif
+    }
+    int32_t failures = 0;
+    bool verification = false;
+    if (kernelQueue.size()) {
+        while (failures < kernelQueue.size()) {
+            int32_t fastestKernel = -1;
+            float fastestTime = std::numeric_limits<float>::infinity();
+
+            for (int32_t x = 0; x < kernelQueue.size(); x++) {
+                if (kernelQueue[x]->executionTime < fastestTime &&
+                    kernelQueue[x]->tested == false) {
+                    fastestKernel = x;
+                    fastestTime = kernelQueue[x]->executionTime;
+                }
+            }
+            if (fastestKernel < 0) break;
+            // Test fastest kernel
+            bool verified = verifyResult(bottom, top, weight, bias, numImages, kernelQueue[fastestKernel], verifyTop);
+            if (verified == true) {
+                kernelQueue[fastestKernel]->verified = true;
+                kernel_index_ = fastestKernel;
+                verification = true;
+                break;
+            } else {
+                kernelQueue[fastestKernel]->tested = true;
+                dbgPrint(std::cout << "Kernel " <<
+                         kernelQueue[fastestKernel]->kernelName <<
+                         " failed verification" << std::endl);
+                failures++;
+            }
+        }
+    }
+    if (verification) {
+        dbgPrint(std::cout << "Kernel <" << kernelQueue[kernel_index_]->kernelName <<
+                 "> passed verification" << std::endl);
+        dbgPrint(std::cout << "Convolution Time:" << kernelQueue[kernel_index_]->executionTime << std::endl);
+    } else {
+        dbgPrint(std::cout << "fallback to basic kernel" << std::endl);
+        options_.str(""); options_.clear(); // clear contents and state flags
+        createBasicKernel(1, 1, 1);
+        kernel_index_ = kernelQueue.size() - 1;
+    }
+    this->bestKernelConfig = kernelQueue[kernel_index_];
+
+
+    if (bestKernelConfig->kernelType != KERNEL_TYPE_INTEL_IDLF && bestKernelConfig->kernelType != KERNEL_TYPE_GEMM_LIKE)
+        if (!swizzled_weights_umat.empty())
+            swizzled_weights_umat.release();
+
+    for (int32_t x = 0; x < kernelQueue.size(); x++) {
+        if (x != kernel_index_) {
+            CV_Assert(phash.find(kernelQueue[x]->kernelName) != phash.end());
+            unloadProgram(kernelQueue[x]->kernelName);
+        }
+    }
+    kernelQueue.clear();
+    tuned_ = true;
+    saveTunedConfig();
+}
+
+template<typename Dtype>
+void OCL4DNNConvSpatial<Dtype>::saveTunedConfig()
+{
+    CV_Assert(tuned_);
+    if (!use_cache_path_ || cache_path_.empty())
+        return;
+
+    std::string outputFile;
+    outputFile = cache_path_ + "/" + key_sanitized_;
+    std::ofstream outputKernel;
+    outputKernel.open(outputFile.c_str());
+    outputKernel << bestKernelConfig->workItem_output[0] << " "
+                 << bestKernelConfig->workItem_output[1] << " "
+                 << bestKernelConfig->workItem_output[2] << " "
+                 << bestKernelConfig->kernelType << " "
+                 << bestKernelConfig->local_work_size[0] << " "
+                 << bestKernelConfig->local_work_size[1] << " "
+                 << bestKernelConfig->local_work_size[2] << " "
+                 << bestKernelConfig->swizzle_weights << " "
+                 << bestKernelConfig->use_null_local << " ";
+    outputKernel.close();
+}
+
+template<typename Dtype>
+void OCL4DNNConvSpatial<Dtype>::prepareKernel(const UMat &bottom, UMat &top,
+                                              const UMat &weight, const UMat &bias,
+                                              int32_t numImages)
+{
+    std::string previous_key = key_;
+
+    generateKey();
+    if (key_.compare(previous_key) == 0 && bestKernelConfig != NULL)
+        return;
+
+    if (bestKernelConfig)
+    {
+        prev_kernel_type_ = bestKernelConfig->kernelType;
+        CV_Assert(phash.find(bestKernelConfig->kernelName) != phash.end());
+        phash.erase(bestKernelConfig->kernelName);
+        bestKernelConfig.release();
+    }
+
+    if (loadCachedConfig()) // check in-memory cache
+        return;
+    if (loadTunedConfig()) // check external storage
+        return;
+
+    UMat benchData(1, numImages * top_dim_, CV_32FC1);
+    if (force_auto_tuning_)
+    {
+        calculateBenchmark(bottom, benchData, weight, bias, numImages);
+        setupConvolution(bottom, top, weight, bias, numImages, benchData);
+    }
+    else
+    {
+        calculateBenchmark(bottom, benchData, weight, bias, numImages);
+        useFirstAvailable(bottom, top, weight, bias, numImages, benchData);
+    }
+    cacheTunedConfig();
+}
+
+template<typename Dtype>
+bool OCL4DNNConvSpatial<Dtype>::loadCachedConfig()
+{
+    cv::AutoLock lock(kernelConfigMutex);
+    if (!defaultConfigLoaded)
+    {
+        const size_t numConfigs = sizeof(default_kernel_config_intel)/sizeof(default_kernel_config_intel[0])/2;
+        for (size_t i = 0; i < numConfigs; i++)
+        {
+            std::pair<std::string, std::string> entry(
+                    std::string("Intel(R) Corporation_") + default_kernel_config_intel[2 * i],
+                    default_kernel_config_intel[2 * i + 1]);
+            kernelConfigMap.insert(entry);
+        }
+        defaultConfigLoaded = true;
+    }
+
+    kernel_hash_t::iterator it = kernelConfigMap.find(key_);
+    if (it != kernelConfigMap.end())
+    {
+        int32_t x, y, z, type, lx, ly, lz;
+        bool swizzle, nullLocal;
+        std::stringstream cachedKernel(it->second);
+        if (cachedKernel)
+        {
+            cachedKernel >> x;
+            cachedKernel >> y;
+            cachedKernel >> z;
+            cachedKernel >> type;
+            cachedKernel >> lx;
+            cachedKernel >> ly;
+            cachedKernel >> lz;
+            cachedKernel >> swizzle;
+            cachedKernel >> nullLocal;
+            if (setupKernelByConfig(x, y, z, type, lx, ly, lz, swizzle, nullLocal)) {
+                tuned_ = true;
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+
+template<typename Dtype>
+bool OCL4DNNConvSpatial<Dtype>::setupKernelByConfig(int x, int y, int z, int type,
+                                                    int lx, int ly, int lz,
+                                                    bool swizzle, bool nullLocal)
+{
+    if (type == KERNEL_TYPE_INTEL_IDLF)
+    {
+        if (z == 1)
+            z = 16;
+        CHECK_EQ(z == 16 || z == 8, true) << "invalid SIMD size" << std::endl;
+    }
+    kernelQueue.clear();
+    createConvolutionKernel(type, x, y, z);
+    if (kernelQueue.size() != 1) {
+        std::cerr << "Failed setup kernel by config:"
+            << " x = " << x
+            << " y = " << y
+            << " z = " << z
+            << " type = " << type
+            << std::endl;
+        return false;
+    }
+    bestKernelConfig = kernelQueue[0];
+    kernelQueue.clear();
+    bestKernelConfig->local_work_size[0] = lx;
+    bestKernelConfig->local_work_size[1] = ly;
+    bestKernelConfig->local_work_size[2] = lz;
+    bestKernelConfig->swizzle_weights = swizzle;
+    bestKernelConfig->use_null_local = nullLocal;
+    // If kernel type changed to type 2 or 4, we need to reset the swizzled
+    // weights pointer to invalidate the previous swizzled weights data.
+    if (prev_kernel_type_ != bestKernelConfig->kernelType &&
+        (bestKernelConfig->kernelType == KERNEL_TYPE_INTEL_IDLF ||
+        bestKernelConfig->kernelType == KERNEL_TYPE_GEMM_LIKE))
+    {
+        if (!swizzled_weights_umat.empty())
+            swizzled_weights_umat.release();
+    }
+    return true;
+}
+
+template<typename Dtype>
+bool OCL4DNNConvSpatial<Dtype>::loadTunedConfig()
+{
+    if (!use_cache_path_)
+    {
+        if (cache_path_.empty() && !force_auto_tuning_)
+        {
+            static int warn_ = 0;
+            if (!warn_)
+            {
+                std::cout << "OpenCV(ocl4dnn): consider to specify kernel configuration cache directory " << std::endl
+                          << "                 via OPENCV_OCL4DNN_CONFIG_PATH parameter." << std::endl;
+                warn_ = true;
+            }
+        }
+        return false;
+    }
+
+    int32_t x, y, z, type, lx, ly, lz;
+    bool swizzle, nullLocal;
+
+    // Find cached kernel configuration from file
+    std::string cacheFile = cache_path_ + "/" + key_sanitized_;
+    std::ifstream cachedKernel(cacheFile.c_str());
+    if (cachedKernel)
+    {
+        cachedKernel >> x;
+        cachedKernel >> y;
+        cachedKernel >> z;
+        cachedKernel >> type;
+        cachedKernel >> lx;
+        cachedKernel >> ly;
+        cachedKernel >> lz;
+        cachedKernel >> swizzle;
+        cachedKernel >> nullLocal;
+        if (setupKernelByConfig(x, y, z, type, lx, ly, lz, swizzle, nullLocal)) {
+            tuned_ = true;
+            return true;
+        }
+    }
+    return false;
+}
+
+template class OCL4DNNConvSpatial<float>;
+} // namespace ocl4dnn
+}
+}
+#endif // HAVE_OPENCL
diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_inner_product.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_inner_product.cpp
new file mode 100644 (file)
index 0000000..b6c1df9
--- /dev/null
@@ -0,0 +1,108 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "../../precomp.hpp"
+#include "common.hpp"
+#include "ocl4dnn.hpp"
+#include "math_functions.hpp"
+
+#ifdef HAVE_OPENCL
+namespace cv { namespace dnn { namespace ocl4dnn {
+template<typename Dtype>
+OCL4DNNInnerProduct<Dtype>::OCL4DNNInnerProduct(OCL4DNNInnerProductConfig config)
+{
+    bias_term_  = config.bias_term;
+    transpose_  = config.transpose;
+    N_ = num_output_ = config.num_output;
+    M_ = config.M;
+    K_ = config.K;
+    phase_test_ = config.phase_test;
+    image_copied_ = false;
+}
+
+template<typename Dtype>
+OCL4DNNInnerProduct<Dtype>::~OCL4DNNInnerProduct()
+{
+}
+
+template<typename Dtype>
+bool OCL4DNNInnerProduct<Dtype>::Forward(const UMat& bottom,
+                                         const UMat& weight,
+                                         const UMat& bias,
+                                         UMat& top)
+{
+    bool ret;
+
+    if (M_ == 1)
+    {
+        ret = ocl4dnnGEMV<Dtype>(CblasNoTrans, N_, K_, (Dtype) 1.,
+                                 weight, 0, bottom, 0, (Dtype) 0., top, 0);
+
+        if (bias_term_ && ret)
+            ret = ocl4dnnAXPY<Dtype>(N_, 1, bias, 0, top, 0);
+
+        return ret;
+    }
+    else
+    {
+        ret = false;
+        size_t max_image_size = std::min(ocl::Device::getDefault().image2DMaxWidth(),
+                                         ocl::Device::getDefault().image2DMaxHeight());
+        if (M_ <= max_image_size &&
+            N_ <= max_image_size &&
+            K_ <= max_image_size &&
+            cv::traits::Depth<Dtype>::value == CV_32F &&
+            ocl::Device::getDefault().intelSubgroupsSupport())
+        {
+            ret = ocl4dnnGEMMCommon<Dtype>(transpose_ ? CblasNoTrans : CblasTrans,
+                                           M_, N_, K_, bottom, weight, UMat(), top,
+                                           max_image_size);
+        }
+        return ret;
+    }
+}
+
+template class OCL4DNNInnerProduct<float>;
+} // namespace ocl4dnn
+}
+}
+#endif // HAVE_OPENCL
diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp
new file mode 100644 (file)
index 0000000..6cc65b7
--- /dev/null
@@ -0,0 +1,126 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "../../precomp.hpp"
+#include "common.hpp"
+#include "ocl4dnn.hpp"
+#include "opencl_kernels_dnn.hpp"
+
+#ifdef HAVE_OPENCL
+namespace cv { namespace dnn { namespace ocl4dnn {
+template<typename Dtype>
+OCL4DNNLRN<Dtype>::OCL4DNNLRN(OCL4DNNLRNConfig config)
+{
+    lrn_type_ = config.lrn_type;
+    phase_test_ = config.phase_test;
+    size_ = config.local_size;
+    CHECK_EQ(size_ % 2, 1)<< "LRN only supports odd values for local_size";
+    alpha_ = config.alpha;
+    beta_ = config.beta;
+    k_ = config.k;
+    norm_by_size_ = config.norm_by_size;
+    num_ = config.batch_size;
+    channels_ = config.channels;
+    height_ = config.height;
+    width_ = config.width;
+}
+
+template<typename Dtype>
+bool OCL4DNNLRN<Dtype>::Forward(const UMat& bottom, UMat& top)
+{
+    bool ret = true;
+
+    if (!ocl::Device::getDefault().intelSubgroupsSupport())
+        return false;
+
+    switch (lrn_type_)
+    {
+    case LRNParameter_NormRegion_ACROSS_CHANNELS:
+        ret = crossChannelForward(bottom, top);
+        break;
+    case LRNParameter_NormRegion_WITHIN_CHANNEL:
+        //TODO
+        //WithinChannelForward(bottom_data, top_data);
+        ret = false;
+        break;
+    default:
+        ret = false;
+        LOG(FATAL)<< "Unknown normalization region.";
+    }
+    return ret;
+}
+
+template<typename Dtype>
+bool OCL4DNNLRN<Dtype>::crossChannelForward(const UMat& bottom, UMat& top)
+{
+    ocl::Queue queue = ocl::Queue::getDefault();
+    CHECK_EQ(phase_test_, true) << "Only support forward inference.";
+
+    cl_uint argIdx = 0;
+    int32_t n_threads = num_ * height_ * width_;
+    size_t global_work_size_[1] = {(size_t)n_threads};
+    String opts = clOptionSupport("-cl-no-subgroup-ifp") ? " -cl-no-subgroup-ifp " : "";
+    ocl::Kernel oclk_lrn_fill;
+    if (!oclk_lrn_fill.create(CL_KERNEL_SELECT("lrn_full_no_scale"), ocl::dnn::ocl4dnn_lrn_oclsrc, opts))
+        return false;
+
+    oclk_lrn_fill.set(argIdx++, n_threads);
+    oclk_lrn_fill.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
+    oclk_lrn_fill.set(argIdx++, num_);
+    oclk_lrn_fill.set(argIdx++, channels_);
+    oclk_lrn_fill.set(argIdx++, height_);
+    oclk_lrn_fill.set(argIdx++, width_);
+    oclk_lrn_fill.set(argIdx++, size_);
+    int size_norm_factor = norm_by_size_ ? size_ : 1;
+    oclk_lrn_fill.set(argIdx++, alpha_ / size_norm_factor);
+    oclk_lrn_fill.set(argIdx++, k_);
+    oclk_lrn_fill.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top));
+    oclk_lrn_fill.set(argIdx++, -beta_);
+
+    return oclk_lrn_fill.run(1, global_work_size_, NULL, false);
+}
+
+template class OCL4DNNLRN<float>;
+} // namespace ocl4dnn
+}
+}
+#endif // HAVE_OPENCL
diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
new file mode 100644 (file)
index 0000000..e0bdf71
--- /dev/null
@@ -0,0 +1,213 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "../../precomp.hpp"
+#include <string>
+#include <vector>
+#include "common.hpp"
+#include "ocl4dnn.hpp"
+#include "opencl_kernels_dnn.hpp"
+
+#ifdef HAVE_OPENCL
+namespace cv { namespace dnn { namespace ocl4dnn {
+template<typename Dtype>
+OCL4DNNPool<Dtype>::OCL4DNNPool(OCL4DNNPoolConfig config)
+{
+    int dims = config.in_shape.size();
+    int spatial_dims = 2;
+
+    batch_size_ = config.in_shape[0];
+    channels_ = config.channels;
+    pool_method_ = config.pool_method;
+
+    for (int i = 0; i < spatial_dims; ++i)
+    {
+        kernel_shape_.push_back(i == 0 ? config.kernel.height : config.kernel.width);
+        pad_.push_back(i == 0 ? config.pad.height : config.pad.width);
+        stride_.push_back(i == 0 ? config.stride.height : config.stride.width);
+        im_in_shape_.push_back(config.in_shape[dims - spatial_dims + i]);
+        im_out_shape_.push_back(config.out_shape[dims - spatial_dims + i]);
+    }
+
+    kernel_h_ = kernel_shape_[0];
+    kernel_w_ = kernel_shape_[1];
+    stride_h_ = stride_[0];
+    stride_w_ = stride_[1];
+    pad_h_ = pad_[0];
+    pad_w_ = pad_[1];
+    height_ = im_in_shape_[0];
+    width_ = im_in_shape_[1];
+    pooled_height_ = im_out_shape_[0];
+    pooled_width_ = im_out_shape_[1];
+
+    count_ = 1;
+    for (int i = 0; i < config.out_shape.size(); ++i)
+    {
+        count_ *= config.out_shape[i];
+    }
+}
+
+template<typename Dtype>
+OCL4DNNPool<Dtype>::~OCL4DNNPool()
+{
+    mask_idx_.release();
+}
+
+template<typename Dtype>
+bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
+                                 UMat& top,
+                                 UMat& top_mask)
+{
+    bool ret = true;
+    ocl::Queue queue = ocl::Queue::getDefault();
+    size_t global[] = { 128 * 128 };
+    size_t local[] = { 128 };
+    cl_uint argIdx = 0;
+
+    // support 2D case
+    switch (pool_method_)
+    {
+    case LIBDNN_POOLING_METHOD_MAX:
+        {
+            if (top_mask.empty() && mask_idx_.empty())
+            {
+                mask_idx_.create(1, count_, CV_32FC1);
+            }
+            ocl::Kernel oclk_max_pool_forward(CL_KERNEL_SELECT("max_pool_forward"),
+                                              cv::ocl::dnn::ocl4dnn_pooling_oclsrc);
+
+            if (oclk_max_pool_forward.empty())
+                return false;
+
+            argIdx = 0;
+            oclk_max_pool_forward.set(argIdx++, count_);
+            oclk_max_pool_forward.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
+            oclk_max_pool_forward.set(argIdx++, batch_size_);
+            oclk_max_pool_forward.set(argIdx++, channels_);
+            oclk_max_pool_forward.set(argIdx++, height_);
+            oclk_max_pool_forward.set(argIdx++, width_);
+            oclk_max_pool_forward.set(argIdx++, pooled_height_);
+            oclk_max_pool_forward.set(argIdx++, pooled_width_);
+            oclk_max_pool_forward.set(argIdx++, kernel_h_);
+            oclk_max_pool_forward.set(argIdx++, kernel_w_);
+            oclk_max_pool_forward.set(argIdx++, stride_h_);
+            oclk_max_pool_forward.set(argIdx++, stride_w_);
+            oclk_max_pool_forward.set(argIdx++, pad_h_);
+            oclk_max_pool_forward.set(argIdx++, pad_w_);
+            oclk_max_pool_forward.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top));
+            oclk_max_pool_forward.set(argIdx++, mask_idx_.empty() ? 0 : 1);
+            if (mask_idx_.empty())
+                oclk_max_pool_forward.set(argIdx++, (void *)NULL);
+            else
+                oclk_max_pool_forward.set(argIdx++, ocl::KernelArg::PtrWriteOnly(mask_idx_));
+            oclk_max_pool_forward.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top_mask));
+
+            ret = oclk_max_pool_forward.run(1, global, local, false);
+        }
+        break;
+    case LIBDNN_POOLING_METHOD_AVE:
+        {
+            ocl::Kernel oclk_ave_pool_forward(CL_KERNEL_SELECT("ave_pool_forward"),
+                                              cv::ocl::dnn::ocl4dnn_pooling_oclsrc);
+
+            if (oclk_ave_pool_forward.empty())
+                return false;
+
+            argIdx = 0;
+            oclk_ave_pool_forward.set(argIdx++, count_);
+            oclk_ave_pool_forward.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
+            oclk_ave_pool_forward.set(argIdx++, batch_size_);
+            oclk_ave_pool_forward.set(argIdx++, channels_);
+            oclk_ave_pool_forward.set(argIdx++, height_);
+            oclk_ave_pool_forward.set(argIdx++, width_);
+            oclk_ave_pool_forward.set(argIdx++, pooled_height_);
+            oclk_ave_pool_forward.set(argIdx++, pooled_width_);
+            oclk_ave_pool_forward.set(argIdx++, kernel_h_);
+            oclk_ave_pool_forward.set(argIdx++, kernel_w_);
+            oclk_ave_pool_forward.set(argIdx++, stride_h_);
+            oclk_ave_pool_forward.set(argIdx++, stride_w_);
+            oclk_ave_pool_forward.set(argIdx++, pad_h_);
+            oclk_ave_pool_forward.set(argIdx++, pad_w_);
+            oclk_ave_pool_forward.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top));
+
+            ret = oclk_ave_pool_forward.run(1, global, local, false);
+        }
+        break;
+    case LIBDNN_POOLING_METHOD_STO:
+        {
+            ocl::Kernel oclk_sto_pool_forward(CL_KERNEL_SELECT("sto_pool_forward_test"),
+                                              cv::ocl::dnn::ocl4dnn_pooling_oclsrc);
+
+            if (oclk_sto_pool_forward.empty())
+                return false;
+
+            argIdx = 0;
+            oclk_sto_pool_forward.set(argIdx++, count_);
+            oclk_sto_pool_forward.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
+            oclk_sto_pool_forward.set(argIdx++, batch_size_);
+            oclk_sto_pool_forward.set(argIdx++, channels_);
+            oclk_sto_pool_forward.set(argIdx++, height_);
+            oclk_sto_pool_forward.set(argIdx++, width_);
+            oclk_sto_pool_forward.set(argIdx++, pooled_height_);
+            oclk_sto_pool_forward.set(argIdx++, pooled_width_);
+            oclk_sto_pool_forward.set(argIdx++, kernel_h_);
+            oclk_sto_pool_forward.set(argIdx++, kernel_w_);
+            oclk_sto_pool_forward.set(argIdx++, stride_h_);
+            oclk_sto_pool_forward.set(argIdx++, stride_w_);
+            oclk_sto_pool_forward.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top));
+
+            ret = oclk_sto_pool_forward.run(1, global, local, false);
+        }
+        break;
+    default:
+        {
+            ret = false;
+            LOG(FATAL)<< "Unknown pooling method.";
+        }
+    }
+    return ret;
+}
+
+template class OCL4DNNPool<float>;
+} // namespace ocl4dnn
+}
+}
+#endif // HAVE_OPENCL
diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_softmax.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_softmax.cpp
new file mode 100644 (file)
index 0000000..e4802d2
--- /dev/null
@@ -0,0 +1,135 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "../../precomp.hpp"
+#include <vector>
+#include "common.hpp"
+#include "ocl4dnn.hpp"
+#include "opencl_kernels_dnn.hpp"
+
+#ifdef HAVE_OPENCL
+namespace cv { namespace dnn { namespace ocl4dnn {
+template<typename Dtype>
+OCL4DNNSoftmax<Dtype>::OCL4DNNSoftmax(OCL4DNNSoftmaxConfig config)
+{
+    softmax_axis_ = config.axis;
+    channels_ = config.channels;
+
+    inner_num_ = 1;
+    outer_num_ = 1;
+    count_ = 1;
+    int32_t scale_sz = 1;
+    for (int32_t i = softmax_axis_ + 1; i < config.in_shape.size(); i++)
+        inner_num_ *= config.in_shape[i];
+    use_slm_ = (config.in_shape[softmax_axis_] * inner_num_ + inner_num_ * 17) <= 8192;
+    for (int32_t i = 0; i < softmax_axis_; i++)
+        outer_num_ *= config.in_shape[i];
+    count_ = inner_num_ + outer_num_;
+
+    std::vector<int32_t> scale_dims = config.in_shape;
+    scale_dims[softmax_axis_] = use_slm_ ? 1 : 17;
+    for (int32_t i = 0; i < scale_dims.size(); i++)
+        scale_sz *= scale_dims[i];
+
+    scale_data_.create(1, scale_sz, CV_32FC1);
+}
+
+template<typename Dtype>
+OCL4DNNSoftmax<Dtype>::~OCL4DNNSoftmax()
+{
+    scale_data_.release();
+}
+
+template<typename Dtype>
+bool OCL4DNNSoftmax<Dtype>::Forward(const UMat& bottom, UMat& top)
+{
+    bool ret = false;
+    ocl::Queue queue = ocl::Queue::getDefault();
+    bool intel_subgroup = ocl::Device::getDefault().intelSubgroupsSupport();
+    if (intel_subgroup && inner_num_ < 128)
+    {
+        String opts = clOptionSupport("-cl-no-subgroup-ifp") ? " -cl-no-subgroup-ifp " : "";
+        String kname;
+        ocl::Kernel oclk_softmax_forward_kernel;
+
+        if (use_slm_)
+            kname = CL_KERNEL_SELECT("softmax_forward_slm");
+        else
+            kname = CL_KERNEL_SELECT("softmax_forward");
+
+        if (!oclk_softmax_forward_kernel.create(kname.c_str(), ocl::dnn::softmax_loss_oclsrc, opts))
+            return false;
+
+        size_t global_size[] = { 256, (size_t)outer_num_, 1 };
+        size_t local_size[] = { 256, 1, 1 };
+        cl_uint argIdx = 0;
+
+        if (use_slm_)
+        {
+            oclk_softmax_forward_kernel.set(argIdx++, outer_num_);
+            oclk_softmax_forward_kernel.set(argIdx++, channels_);
+            oclk_softmax_forward_kernel.set(argIdx++, inner_num_);
+            oclk_softmax_forward_kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(scale_data_));
+            oclk_softmax_forward_kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
+            oclk_softmax_forward_kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top));
+            oclk_softmax_forward_kernel.set(argIdx++, NULL, channels_ * inner_num_* sizeof(Dtype));
+            oclk_softmax_forward_kernel.set(argIdx++, NULL, inner_num_* sizeof(Dtype));
+            oclk_softmax_forward_kernel.set(argIdx++, NULL, 16 * inner_num_* sizeof(Dtype));
+        }
+        else
+        {
+            oclk_softmax_forward_kernel.set(argIdx++, outer_num_);
+            oclk_softmax_forward_kernel.set(argIdx++, channels_);
+            oclk_softmax_forward_kernel.set(argIdx++, inner_num_);
+            oclk_softmax_forward_kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(scale_data_));
+            oclk_softmax_forward_kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
+            oclk_softmax_forward_kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top));
+        }
+        ret = oclk_softmax_forward_kernel.run(3, global_size, local_size, false);
+    }
+    return ret;
+}
+
+template class OCL4DNNSoftmax<float>;
+} // namespace ocl4dnn
+}
+}
+#endif // HAVE_OPENCL
index b98e52f..0649f2e 100644 (file)
@@ -1,3 +1,45 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
 __kernel void ReLUForward(const int count, __global const T* in, __global T* out
 #ifndef RELU_NO_SLOPE
 , T negative_slope
diff --git a/modules/dnn/src/opencl/batchnorm.cl b/modules/dnn/src/opencl/batchnorm.cl
new file mode 100644 (file)
index 0000000..3f9401c
--- /dev/null
@@ -0,0 +1,26 @@
+
+__kernel void batchnorm(__global const T *src, int src_offset,
+                        __global const float *meanMat,
+                        float varMeanScale,
+                        __global const float *invStdMat,
+                        __global const float *weight,
+                        __global const float *bias,
+                        int hasWeight, int hasBias,
+                        int width, int height, int channel,
+                        __global T *dst, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+    int c = get_global_id(2);
+
+    if (x >= width || y >= height || c >= channel)
+        return;
+
+    float mean = meanMat[c] * varMeanScale;
+    float invstd = invStdMat[c];
+    float w = hasWeight ? weight[c] : 1;
+    float b = hasBias ? bias[c] : 0;
+    int index = y * width + x + c * width * height;
+    T val = (src[index + src_offset] - mean) * w * invstd + b;
+    dst[index + dst_offset] = val;
+}
diff --git a/modules/dnn/src/opencl/concat.cl b/modules/dnn/src/opencl/concat.cl
new file mode 100644 (file)
index 0000000..041e6ac
--- /dev/null
@@ -0,0 +1,60 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+__kernel void concat(const int nthreads,
+                     __global const Dtype* in_data,
+                     const int num_concats,
+                     const int concat_size,
+                     const int top_concat_axis,
+                     const int bottom_concat_axis,
+                     const int offset_concat_axis,
+                     __global Dtype* out_data) {
+
+  for (int index = get_global_id(0); index < nthreads;
+      index += get_global_size(0)) {
+    const int total_concat_size = concat_size * bottom_concat_axis;
+    const int concat_num = index / total_concat_size;
+    const int concat_index = index % total_concat_size;
+    const int top_index = concat_index
+        + (concat_num * top_concat_axis + offset_concat_axis) * concat_size;
+    out_data[top_index] = in_data[index];
+  }
+}
diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl
new file mode 100644 (file)
index 0000000..a7bca1d
--- /dev/null
@@ -0,0 +1,1670 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if APPLY_BIAS
+#define BIAS_KERNEL_ARG __global Dtype * biases_base,
+#else
+#define BIAS_KERNEL_ARG
+#endif
+
+#define ACTIVATION_FUNCTION(_dst_, _offset_, _data_) do { (_dst_)[(_offset_)] = (_data_);} while(0)
+
+#define __CAT(x, y) x##y
+#define CAT(x, y) __CAT(x, y)
+#define LOOP0(VAR, STMT)
+#define LOOP1(VAR, STMT) (STMT); (VAR)++;
+#define LOOP2(VAR, STMT) LOOP1(VAR, STMT); (STMT); (VAR)++;
+#define LOOP3(VAR, STMT) LOOP2(VAR, STMT); (STMT); (VAR)++;
+#define LOOP4(VAR, STMT) LOOP3(VAR, STMT); (STMT); (VAR)++;
+#define LOOP5(VAR, STMT) LOOP4(VAR, STMT); (STMT); (VAR)++;
+#define LOOP6(VAR, STMT) LOOP5(VAR, STMT); (STMT); (VAR)++;
+#define LOOP7(VAR, STMT) LOOP6(VAR, STMT); (STMT); (VAR)++;
+#define LOOP8(VAR, STMT) LOOP7(VAR, STMT); (STMT); (VAR)++;
+#define LOOP9(VAR, STMT) LOOP8(VAR, STMT); (STMT); (VAR)++;
+#define LOOP10(VAR, STMT) LOOP9(VAR, STMT); (STMT); (VAR)++;
+#define LOOP11(VAR, STMT) LOOP10(VAR, STMT); (STMT); (VAR)++;
+#define LOOP12(VAR, STMT) LOOP11(VAR, STMT); (STMT); (VAR)++;
+#define LOOP13(VAR, STMT) LOOP12(VAR, STMT); (STMT); (VAR)++;
+#define LOOP14(VAR, STMT) LOOP13(VAR, STMT); (STMT); (VAR)++;
+#define LOOP15(VAR, STMT) LOOP14(VAR, STMT); (STMT); (VAR)++;
+#define LOOP16(VAR, STMT) LOOP15(VAR, STMT); (STMT); (VAR)++;
+#define LOOP(N, VAR, STMT) CAT(LOOP, N)((VAR), (STMT))
+
+#if defined(convolve_simd) || defined(Conv_Interleaved)
+#if Dtype_SIZE == 4
+#define INT_TYPE uint
+#define INT_TYPE2 uint2
+#define INT_TYPE4 uint4
+#define INT_TYPE8 uint8
+#define SUB_GROUP_BLOCK_READ2 intel_sub_group_block_read2
+#define SUB_GROUP_BLOCK_READ4 intel_sub_group_block_read4
+#define SUB_GROUP_BLOCK_READ8 intel_sub_group_block_read8
+#define SUB_GROUP_BLOCK_READ intel_sub_group_block_read
+#else
+#error "Unsupported type"
+#endif
+#endif
+
+#ifdef KERNEL_BASIC
+
+__kernel void ConvolveBasic(
+    __global Dtype* image_data,
+    int image_offset,
+    __global Dtype* kernel_data,
+    int kernel_offset,
+    __global Dtype* bias,
+    const int bias_offset,
+    __global Dtype* convolved_image,
+    const int convolved_image_offset,
+    const ushort input_width,
+    const ushort input_height,
+    const ushort output_width,
+    const ushort output_height,
+    const ushort pad_w,
+    const ushort pad_h
+)
+{
+    const int outputX = get_global_id(0);
+    const int outputY = get_global_id(1);
+    const int kernelNum = get_global_id(2) * ZPAR;
+    if (outputX < output_width && outputY < output_height)
+    {
+        Dtype sum[ZPAR];
+        for (int kern = 0; kern < ZPAR; kern++)
+        {
+            sum[kern] = 0.0f;
+        }
+        const int org_y = outputY * STRIDE_Y - pad_h;
+        const int org_x = outputX * STRIDE_X - pad_w;
+        const int currentKernelOffset = kernel_offset + kernelNum*KERNEL_HEIGHT*KERNEL_WIDTH*CHANNELS;
+#if APPLY_BIAS
+        const int biasIndex = bias_offset + kernelNum;
+#endif
+        const int local_image_offset = org_y * input_width + org_x;
+        const int imageSize = input_width * input_height;
+        __global Dtype* image_dataPtr = (image_data + (image_offset + local_image_offset));
+        __global Dtype* kernel_dataPtr = (kernel_data + (currentKernelOffset));
+        for (int c = 0; c < CHANNELS; c++)
+        {
+            for (int y = 0; y < KERNEL_HEIGHT; y++)
+            {
+                for (int x = 0; x < KERNEL_WIDTH; x++)
+                {
+                    int y_ = org_y + y * DILATION_Y;
+                    int x_ = org_x + x * DILATION_X;
+                    if (!(y_ >= 0 && y_ < input_height && x_ >= 0 && x_ < input_width))
+                    {
+                        continue;
+                    }
+                    for (int kern = 0; kern < ZPAR; kern++)
+                    {
+                        sum[kern] += image_dataPtr[x * DILATION_X] * kernel_dataPtr[kern*KERNEL_HEIGHT*KERNEL_WIDTH*CHANNELS + x];
+                    }
+                }
+                image_dataPtr += input_width * DILATION_Y;
+                kernel_dataPtr += KERNEL_WIDTH;
+            }
+            image_dataPtr += imageSize - input_width*KERNEL_HEIGHT*DILATION_Y;
+        }
+
+        for (int kern = 0; kern < ZPAR; kern++)
+        {
+            if (kernelNum + kern < OUTPUT_Z)
+            {
+                int offset = convolved_image_offset + (kernelNum+kern)*output_height*output_width + outputY*output_width + outputX;
+#if APPLY_BIAS
+                ACTIVATION_FUNCTION(convolved_image, offset, sum[kern] + bias[biasIndex + kern]);
+#else
+                ACTIVATION_FUNCTION(convolved_image, offset, sum[kern]);
+#endif
+            }
+        }
+    }
+}
+
+#elif defined KERNEL_IDLF
+
+#if TYPE == TYPE_HALF
+#define VLOAD4(_v, _p) do { (_v).s0 = *(_p); (_v).s1 = *(_p + 1); (_v).s2 = *(_p + 2); (_v).s3 = *(_p + 3); } while(0)
+#else
+#define VLOAD4(_v, _p) do { _v = vload4(0, _p); } while(0)
+#endif
+
+// Each work-item computes a OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT region of one output map.
+// Each work-group (which will be mapped to 1 SIMD16/SIMD8 EU thread) will compute 16/8 different feature maps, but each feature map is for the same region of the imput image.
+// NDRange:  (output_width+pad)/ OUT_BLOCK_WIDTH, (output_height+pad)/OUT_BLOCK_HEIGHT, NUM_FILTERS/OUT_BLOCK_DEPTH
+
+// NOTE: for beignet this reqd_work_group_size does not guarantee that SIMD16 mode will be used, the compiler could choose to use two SIMD8 threads, and if that happens the code will break.
+#ifndef __BEIGNET__
+__attribute__((reqd_work_group_size(1, 1, SIMD_SIZE)))
+__attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
+#endif
+__kernel void
+convolve_simd(
+    __global Dtype* inputs_base,
+    filter_qualifier Dtype* weights_base,
+    BIAS_KERNEL_ARG
+    __global Dtype* outputs_base,
+    const ushort input_width,
+    const ushort input_height,
+    const ushort output_width,
+    const ushort output_height)
+{
+  __global Dtype* outputs = outputs_base;
+  __global Dtype* inputs = inputs_base;
+  filter_qualifier Dtype* weights = weights_base;
+  unsigned int oc = get_global_id(0) * OUT_BLOCK_WIDTH;  // oc = Output Column
+  unsigned int or = get_global_id(1) * OUT_BLOCK_HEIGHT;// or = Output Row
+  unsigned int fm = get_global_id(2);// fm = Feature Map = od = Output Depth
+  unsigned int fmg = get_group_id(2);
+  unsigned int lid = get_local_id(2);
+
+  Dtype out[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT];
+
+  int in_addr;
+
+  // find weights adress of given neuron (lid is index)
+  unsigned int weight_addr = (fmg % (ALIGNED_NUM_FILTERS/SIMD_SIZE)) * INPUT_DEPTH * KERNEL_WIDTH * KERNEL_HEIGHT * SIMD_SIZE + lid;
+
+  for(int i=0;i<OUT_BLOCK_SIZE;i++) {
+    out[i]=0.0f;
+  }
+
+  unsigned int num_in_batch = ( fm ) / ALIGNED_NUM_FILTERS;
+
+  unsigned int input_batch_offset = num_in_batch * input_height * input_width * TOTAL_INPUT_DEPTH_SIZE;
+
+  int curr_local_y = ( lid / ( TILE_X / 4 ) );
+  int curr_local_x = ( lid % ( TILE_X / 4 ) ) * 4;
+  int curr_y = or * STRIDE_Y + INPUT_START_Y + curr_local_y;
+  int curr_x = oc * STRIDE_X + INPUT_START_X + curr_local_x;
+#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0
+  int saved_y = curr_y;
+#endif
+  in_addr = input_batch_offset + INPUT_START_Z * input_height * input_width
+            +  (curr_y - INPUT_PAD_H) * input_width             // y tile offset
+            +   curr_x - INPUT_PAD_W;                        // x tile offset
+  union {
+    Dtype4 in_vec[INVEC_SIZE];
+    Dtype in_array[INVEC_SIZE * 4];
+  } in_buf;
+
+  for(int kd = 0; kd < INPUT_DEPTH; kd++)
+  {
+    int in_offset = in_addr;
+    int reg = 0;
+    LOOP(INVEC_SIZE, reg,
+      {
+        if (curr_local_y + reg * TILE_Y_STRIDE < TILE_Y || INVEC_SIZE * TILE_Y_STRIDE <= (TILE_Y + 2) || reg < INVEC_SIZE - 1) {
+#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0
+        if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + 3 >= INPUT_PAD_W && curr_x < input_width + INPUT_PAD_W) {
+          if (curr_x < INPUT_PAD_W) {
+            in_buf.in_vec[reg].s0 = 0;
+            if (curr_x + 1 >= INPUT_PAD_W)
+              in_buf.in_vec[reg].s1 = *(inputs + in_offset + 1);
+            else
+              in_buf.in_vec[reg].s1 = 0;
+            if (curr_x + 2 >= INPUT_PAD_W)
+              in_buf.in_vec[reg].s2 = *(inputs + in_offset + 2);
+            else
+              in_buf.in_vec[reg].s2 = 0;
+            in_buf.in_vec[reg].s3 = *(inputs + in_offset + 3);
+          } else {
+            VLOAD4(in_buf.in_vec[reg], inputs + in_offset);
+            if (curr_x + 1 >= input_width + INPUT_PAD_W)
+              in_buf.in_vec[reg].s1 = 0;
+            if (curr_x + 2 >= input_width + INPUT_PAD_W)
+              in_buf.in_vec[reg].s2 = 0;
+            if (curr_x + 3 >= input_width + INPUT_PAD_W)
+              in_buf.in_vec[reg].s3 = 0;
+          }
+        } else {
+          in_buf.in_vec[reg] = 0;
+        }
+        curr_y += TILE_Y_STRIDE;
+#else
+        VLOAD4(in_buf.in_vec[reg], inputs + in_offset);
+#endif
+        }
+        in_offset += input_width * TILE_Y_STRIDE;
+      });
+    in_addr += input_height * input_width;
+#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0
+    curr_y = saved_y;
+#endif
+
+#if KERNEL_WIDTH * KERNEL_HEIGHT != 1
+#define WEIGHT_PREF 8
+#else
+#define WEIGHT_PREF 1
+#endif
+    union {
+      Dtype w[WEIGHT_PREF];
+#if KERNEL_WIDTH * KERNEL_HEIGHT != 1
+      INT_TYPE8 ui8;
+#endif
+    } weight_buf;
+    int w_idx=0;
+
+    unsigned int orig_weight_addr = weight_addr;
+#if KERNEL_WIDTH * KERNEL_HEIGHT != 1
+    weight_buf.ui8 = SUB_GROUP_BLOCK_READ8((__global INT_TYPE *)&weights[weight_addr]);
+    weight_addr += SIMD_SIZE * WEIGHT_PREF;
+#else
+    weight_buf.w[0] = as_Dtype(SUB_GROUP_BLOCK_READ((__global INT_TYPE *)&weights[weight_addr]));
+    weight_addr += SIMD_SIZE * 1;
+#endif
+
+#define BLOCK_IN(n) sub_group_broadcast( in_buf.in_array[((n)%4) + ((n) / (TILE_Y_STRIDE * TILE_X)) * 4], (((n) % (TILE_Y_STRIDE * TILE_X))/4))
+
+    int kr = 0;  // kr = Kernel Row
+    LOOP(KERNEL_HEIGHT, kr,// LOOP is a macro that unrolls the loop.
+        {
+          int kc = 0;  // kc = Kernel Column
+          LOOP(KERNEL_WIDTH, kc,
+              {
+                for(int br=0; br < OUT_BLOCK_HEIGHT; br++) {
+                  for(int bc=0; bc < OUT_BLOCK_WIDTH; bc++) {
+                    Dtype input = BLOCK_IN((br * STRIDE_Y + kr * DILATION_Y) * TILE_X + bc * STRIDE_X + kc * DILATION_X);
+                    out[br * OUT_BLOCK_WIDTH + bc] = mad(weight_buf.w[w_idx % WEIGHT_PREF], input, out[br * OUT_BLOCK_WIDTH + bc]);
+                  }
+                }
+#if KERNEL_WIDTH * KERNEL_HEIGHT > WEIGHT_PREF
+                // We assume KERNEL_W is equal to KERNEL_H here.
+                if ((w_idx + 1) % WEIGHT_PREF == 0
+                #if KERNEL_WIDTH * KERNEL_HEIGHT % 8 != 0
+                && ((w_idx + 1) <= (KERNEL_WIDTH * KERNEL_HEIGHT - WEIGHT_PREF))
+                #endif
+                    ) {
+                  weight_buf.ui8 = SUB_GROUP_BLOCK_READ8((__global INT_TYPE *)&weights[weight_addr]);
+                  weight_addr += SIMD_SIZE * WEIGHT_PREF;  // weights must be stored in just the right SIMD swizzled format for this to work, see host code for details.
+                }
+              #if KERNEL_WIDTH*KERNEL_HEIGHT % 8 == 0
+                // need to do nothing
+              #else
+                else if ((w_idx + 1) %  WEIGHT_PREF == 0 && ((w_idx + 1) > (KERNEL_WIDTH * KERNEL_HEIGHT - WEIGHT_PREF)))
+                #if KERNEL_WIDTH * KERNEL_HEIGHT % 8 == 1
+                  weight_buf.w[0] = weights[weight_addr];
+                #elif KERNEL_WIDTH * KERNEL_HEIGHT % 8 == 2
+                  weight_buf.ui8.s01 = SUB_GROUP_BLOCK_READ2((__global INT_TYPE *)&weights[weight_addr]);
+                #elif KERNEL_WIDTH * KERNEL_HEIGHT % 8 <= 4
+                  weight_buf.ui8.s0123 = SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)&weights[weight_addr]);
+                #else
+                  weight_buf.ui8 = SUB_GROUP_BLOCK_READ8((__global INT_TYPE *)&weights[weight_addr]);
+                #endif
+              #endif
+#endif
+                ++w_idx;
+              });
+        });
+    weight_addr = orig_weight_addr + KERNEL_WIDTH * KERNEL_HEIGHT * SIMD_SIZE;
+
+  }
+  // dead code to work around possible compiler bug.
+  if (ALIGNED_NUM_FILTERS != NUM_FILTERS && fm > 0xfffffffeul) {
+    outputs[0] = BLOCK_IN(fm % SIMD_SIZE);
+  }
+  fm = fm % ALIGNED_NUM_FILTERS;
+
+  if ((ALIGNED_NUM_FILTERS == NUM_FILTERS || fm < NUM_FILTERS)) {
+  unsigned int out_addr = OUT_BUFF_OFFSET + ( num_in_batch * TOTAL_OUTPUT_DEPTH + fm ) * output_width * output_height;
+  out_addr += or * output_width + oc;
+  // we need this address calculation for biases because we support views and batching
+#if APPLY_BIAS
+  Dtype bias = biases_base[fm];
+#else
+  Dtype bias = 0;
+#endif
+    for(unsigned int r = 0; r < OUT_BLOCK_HEIGHT; r++) {
+      if (r + or >= output_height) break;
+      for(unsigned int c = 0; c < OUT_BLOCK_WIDTH; c++) {
+        if (c + oc >= output_width) break;
+        // this does a scattered write to SIMD_SIZE different feature maps, so that data within one map is contiguous, thus ready for input to next layer.
+        ACTIVATION_FUNCTION(outputs, out_addr + r * output_width + c, bias + out[r * OUT_BLOCK_WIDTH + c]);
+
+      }
+    }
+  }
+}
+
+#else // KERNEL_GEMM_LIKE
+
+#if APPLY_BIAS
+// Dtype bias[4];
+#define SUBGROUP_GET_BIAS(k, i) intel_sub_group_shuffle(bias[k], i)
+#else
+#define SUBGROUP_GET_BIAS(k, i) ((Dtype)0)
+#endif
+
+#ifdef Conv_Interleaved
+typedef struct float1 { float s0; } float1;
+typedef struct float5 { float s0; float s1; float s2; float s3; float s4; } float5;
+typedef struct float6 { float s0; float s1; float s2; float s3; float s4; float s5; } float6;
+typedef struct float7 { float s0; float s1; float s2; float s3; float s4; float s5; float s6; } float7;
+typedef struct float9 { float s0; float s1; float s2; float s3; float s4; float s5; float s6; float s7; float s8; } float9;
+typedef struct float10 { float s0; float s1; float s2; float s3; float s4; float s5;
+                         float s6; float s7; float s8; float s9;} float10;
+typedef struct float11 { float s0; float s1; float s2; float s3; float s4; float s5;
+                         float s6; float s7; float s8; float s9; float sa;} float11;
+typedef struct float12 { float s0; float s1; float s2; float s3; float s4; float s5;
+                         float s6; float s7; float s8; float s9; float sa; float sb; } float12;
+typedef struct float13 { float s0; float s1; float s2; float s3; float s4; float s5;
+                         float s6; float s7; float s8; float s9; float sa; float sb; float sc;} float13;
+typedef struct float14 { float s0; float s1; float s2; float s3; float s4; float s5;
+                         float s6; float s7; float s8; float s9; float sa; float sb; float sc; float sd; } float14;
+typedef struct float15 { float s0; float s1; float s2; float s3; float s4; float s5;
+                         float s6; float s7; float s8; float s9; float sa; float sb; float sc; float sd; float se; } float15;
+typedef struct float0 { float s0; } float0; //never used but makes compiler happy.
+
+#define OUT_PITCH_X output_width
+#define ROW_PITCH input_width
+
+#define GEMM_LIKE_KERNEL_ARGS     \
+    const __global Dtype *src0,   \
+    const __global Dtype *src1,   \
+    BIAS_KERNEL_ARG               \
+    __global Dtype *dst,          \
+    const ushort input_width,     \
+    const ushort input_height,    \
+    const ushort output_width,    \
+    const ushort output_height,   \
+    const int out_pitch_y,     \
+    const int out_pitch_z,     \
+    const int aligned_input_size, \
+    const int slice_pitch
+#endif
+
+#ifdef GEMM_LIKE_CONV_32_1
+//////////////////////////////////////////////////////////////////////////////
+// Conv_Interleaved_32_1_flex
+//
+// Convolution: each workitem computes 1 patch x 32 filters worth of output
+// data.  Kernel's inner loop works on a single tile consisting of one
+// row from each patch and the filter data corresponding to that row.  Filter
+// matrix is interleaved to reduce GRF bank conflicts.  Patches are walked
+// by rows and then by slices.  Relies on sub_group extension for block
+// reads and SIMD broadcast.  Allows flexible sizing of TILE width (TILE_N)
+// by dynamically selecting one of two code paths: one uses TILE_N = 32 and
+// the other uses TILE_N = 8, 16, or 24.
+#define TILE_M          1
+#define TILE_K          KERNEL_WIDTH
+#define TILE_N          32
+
+#ifndef __BEIGNET__
+__attribute__((intel_reqd_sub_group_size(8)))
+#endif
+__kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
+{
+    const int group_x = get_group_id(0);
+    const int group_y = get_group_id(1);
+    const int global_x = get_global_id(0);
+    const int global_y = get_global_id(1);
+    const int global_z = get_global_id(2);
+    int interleaved_y;
+    int kernel_y;
+    int kernel_idx;
+
+#define DOT_PRODUCT_8( _result, _rowA, colB )    \
+    {   \
+        _result.s0 = mad( _rowA, sub_group_broadcast( colB, 0 ), _result.s0 );  \
+        _result.s1 = mad( _rowA, sub_group_broadcast( colB, 1 ), _result.s1 );  \
+        _result.s2 = mad( _rowA, sub_group_broadcast( colB, 2 ), _result.s2 );  \
+        _result.s3 = mad( _rowA, sub_group_broadcast( colB, 3 ), _result.s3 );  \
+        _result.s4 = mad( _rowA, sub_group_broadcast( colB, 4 ), _result.s4 );  \
+        _result.s5 = mad( _rowA, sub_group_broadcast( colB, 5 ), _result.s5 );  \
+        _result.s6 = mad( _rowA, sub_group_broadcast( colB, 6 ), _result.s6 );  \
+        _result.s7 = mad( _rowA, sub_group_broadcast( colB, 7 ), _result.s7 );  \
+    }
+    typedef CAT( Dtype, KERNEL_WIDTH ) Dtype_t;
+
+    // True for all threads if filter_width is multiple of TILE_N
+    // else, true for all but right-most column of threads.
+    if( TILE_N_LAST == 0 || global_x < WIDTH1 / TILE_N )
+    {
+        // Result ctile (*dst) is M rows x N columns
+        // LWG size is 1x8.  Thus each thread calculates 8*M rows x N cols of ctile.
+        Dtype8  blockC00 = 0.f;
+        Dtype8  blockC10 = 0.f;
+        Dtype8  blockC20 = 0.f;
+        Dtype8  blockC30 = 0.f;
+
+        // Src0 (patch input) is directly used as atile.
+        // Each work item points to the start of a different patch.
+        // atile is M rows x K columns.
+        int curr_x = ( global_y % output_width ) * STRIDE_X;
+        int curr_y = ( global_y / output_width ) * STRIDE_Y;
+#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
+        int saved_y = curr_y;
+#endif
+        const __global Dtype *src0_read = src0
+          + aligned_input_size * global_z                            // batch offset
+          + (curr_y - INPUT_PAD_H) * ROW_PITCH      // y offset
+          + (curr_x - INPUT_PAD_W);                 // x offset
+
+        // Src1 (filter) is directly used as btile.
+        // It starts at the top of src1 and walks down.
+        // btile is K rows x N columns.
+        const __global Dtype *src1_read = src1 + ( global_x * TILE_N  * 2);
+
+        // Walk DOWN src0 (patch 0, 1, 2, ...) and DOWN src1.
+        // Inner loop loads and FMADs one row (KERNEL_WIDTH) of each input patch
+        // and KERNEL_WIDTH/2 rows of interleaved filter.
+        int patch_depth = 0;
+        do
+        {
+            int patch_row = 0;
+#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
+            curr_y = saved_y;
+#endif
+
+            do
+            {
+                // Load atile and btile.
+                // Kernel data is partially interleaved.  Every 2 rows are interleaved at Dtype8 granularity.
+                // The exception is that if KERNEL_WIDTH is odd the last row is not interleaved.  The non
+                // interleaved row is padded with zero to ensure same size as interleaved rows. This
+                // interleaving is done to ensure 0% GDR bank conflicts.  For example, this is how the
+                // kernel data would be arranged before/after interleaving for KERNEL_WIDTH=3.
+                // (0, 0) (8, 0) (16, 0) (24, 0) ...       (0, 0) (0, 1) (8, 0) (0, 1) (16, 0) (0, 1) (24, 0) ..
+                // (0, 1) (8, 1) (16, 1) (24, 1) ... =>    (0, 2) (8, 2) (16, 2) (24, 2) ...
+                // (0, 2) (8, 2) (16, 2) (24, 2) ...       ...
+                // ...
+                const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
+
+#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1
+                Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[  0  ];
+                Dtype*  pblockA00 = (Dtype*)(&blockA00);
+#else
+                Dtype_t blockA00;
+                Dtype*  pblockA00 = (Dtype*)(&blockA00);
+                int pos = 0;
+                LOOP(KERNEL_WIDTH, pos,
+                {
+                  if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
+                    pblockA00[pos] = src0_read[pos * DILATION_X];
+                  else
+                    pblockA00[pos] = 0;
+                })
+                curr_y += DILATION_Y;
+#endif
+                src0_read += (ROW_PITCH * DILATION_Y);
+
+                Dtype blockB00[KERNEL_WIDTH*4];
+                Dtype8* p8BlockB00 = (Dtype8*)blockB00;
+                Dtype4* p4BlockB00 = (Dtype4*)blockB00;
+                Dtype*  pBlockB00 =  (Dtype* )blockB00;
+
+                interleaved_y = 0;
+                LOOP(KERNEL_WIDTH_DIV2, interleaved_y,
+                {
+                    p8BlockB00[interleaved_y] = as_Dtype8( SUB_GROUP_BLOCK_READ8( (const __global INT_TYPE *)src1_read ) );
+                    src1_read += WIDTH1 * 2;
+                } )
+                if ( kernel_width_is_odd )
+                {
+                    p4BlockB00[KERNEL_WIDTH - 1] = as_Dtype4( SUB_GROUP_BLOCK_READ4( (const __global INT_TYPE *)src1_read ) );
+                    src1_read += WIDTH1 * 2;
+                }
+
+                // Perform MADs
+                kernel_idx = 0;
+                interleaved_y = 0;
+                LOOP(KERNEL_WIDTH_DIV2, interleaved_y,
+                {
+                    kernel_y = interleaved_y * 2;
+                    DOT_PRODUCT_8( blockC00, pblockA00[kernel_y    ], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC00, pblockA00[kernel_y + 1], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC10, pblockA00[kernel_y    ], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC10, pblockA00[kernel_y + 1], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC20, pblockA00[kernel_y    ], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC20, pblockA00[kernel_y + 1], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC30, pblockA00[kernel_y    ], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC30, pblockA00[kernel_y + 1], pBlockB00[kernel_idx] ); kernel_idx++;
+                } )
+                    kernel_y = interleaved_y * 2;
+                if ( kernel_width_is_odd )
+                {
+                    DOT_PRODUCT_8( blockC00, pblockA00[kernel_y], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC10, pblockA00[kernel_y], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC20, pblockA00[kernel_y], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC30, pblockA00[kernel_y], pBlockB00[kernel_idx] ); kernel_idx++;
+                }
+            }
+
+            //while( ++patch_row < 1 ); //debug
+            while( ++patch_row < KERNEL_HEIGHT );
+
+            src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y); // reset to start of next slice of patch
+        }
+        //while ( ++patch_depth < 1 ); //debug
+        while ( ++patch_depth < INPUT_DEPTH );
+
+        // Dst resembles a cube of width x height x (output channel * batches).  Each tile writes:
+        // (SIMD * TILE_M) x 1 x TILE_N.  Partial writes most likely generated if padding used.
+        int out_offset = global_z * out_pitch_z                                                   // batch offset
+         + ( group_x * TILE_N ) * out_pitch_y                                       // channel offset
+         + ( ( global_y * TILE_M ) / output_width + OUT_PADDING_HEIGHT) * OUT_PITCH_X  // y offset
+         + ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT;               // x offset
+
+        __global Dtype *out = dst + out_offset;
+#if APPLY_BIAS
+        Dtype bias[4];
+        Dtype4 *bias_vec;
+        bias_vec = (Dtype4*)bias;
+        *bias_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)biases_base + group_x * TILE_N));
+#endif
+#ifdef FUSED_CONV_CHANNEL_RELU
+        Dtype slope[4];
+        Dtype4 *slope_vec;
+        slope_vec = (Dtype4*)slope;
+        *slope_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)negative_slope_base + group_x * TILE_N));
+        Dtype negative_slope;
+#endif
+        if (global_y * TILE_M < output_width * output_height )
+        {
+            for (int i = 0; i < 8; i++)
+            {
+#ifdef FUSED_CONV_CHANNEL_RELU
+            negative_slope = intel_sub_group_shuffle(slope[0], i);
+#endif
+            ACTIVATION_FUNCTION(dst, out_offset + ( 0 + i ) * out_pitch_y, blockC00[i] + SUBGROUP_GET_BIAS(0, i));
+
+#ifdef FUSED_CONV_CHANNEL_RELU
+            negative_slope = intel_sub_group_shuffle(slope[1], i);
+#endif
+            ACTIVATION_FUNCTION(dst, out_offset + ( 8 + i ) * out_pitch_y, blockC10[i] + SUBGROUP_GET_BIAS(1, i));
+#ifdef FUSED_CONV_CHANNEL_RELU
+            negative_slope = intel_sub_group_shuffle(slope[2], i);
+#endif
+            ACTIVATION_FUNCTION(dst, out_offset + ( 16 + i ) * out_pitch_y, blockC20[i] + SUBGROUP_GET_BIAS(2, i));
+#ifdef FUSED_CONV_CHANNEL_RELU
+            negative_slope = intel_sub_group_shuffle(slope[3], i);
+#endif
+            ACTIVATION_FUNCTION(dst, out_offset + ( 24 + i ) * out_pitch_y, blockC30[i] + SUBGROUP_GET_BIAS(3, i));
+            }
+        }
+    }
+#if TILE_N_LAST > 0
+    else
+    {
+
+        // Result ctile (*dst) is M rows x N columns
+        // LWG size is 1x8.  Thus each thread calculates 8*M rows x N cols of ctile.
+        int i = 0;
+        Dtype8  blockC[TILE_N_LAST_DIV8];
+        LOOP(TILE_N_LAST_DIV8, i,
+        {
+            blockC[i] = 0.f;
+        } )
+
+        // Src0 (patch input) is directly used as atile.
+        // Each work item points to the start of a different patch.
+        // atile is M rows x K columns.
+        int curr_x = ( global_y % output_width ) * STRIDE_X;
+        int curr_y = ( global_y / output_width ) * STRIDE_Y;
+#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
+        int saved_y = curr_y;
+#endif
+        const __global Dtype *src0_read = src0
+          + aligned_input_size * global_z                            // batch offset
+          + (curr_y - INPUT_PAD_H) * ROW_PITCH      // y offset
+          + (curr_x - INPUT_PAD_W);                 // x offset
+
+        // Src1 (filter) is directly used as btile.
+        // It starts at the top of src1 and walks down.
+        // btile is K rows x N columns.
+        const __global Dtype *src1_read = src1 + ( global_x * TILE_N  * 2);
+
+        // Walk DOWN src0 (patch 0, 1, 2, ...) and DOWN src1.
+        // Inner loop loads and FMADs one row (KERNEL_WIDTH) of each input patch
+        // and KERNEL_WIDTH/2 rows of interleaved filter.
+        int patch_depth = 0;
+        do
+        {
+            int patch_row = 0;
+#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
+            curr_y = saved_y;
+#endif
+            do
+            {
+                // Load atile and interleaved btile.
+                const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
+#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1
+                Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[  0  ];
+                Dtype*  pblockA00 = (Dtype*)(&blockA00);
+#else
+                Dtype_t blockA00;
+                Dtype*  pblockA00 = (Dtype*)(&blockA00);
+                int pos = 0;
+                LOOP(KERNEL_WIDTH, pos,
+                {
+                  if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
+                    pblockA00[pos] = src0_read[pos * DILATION_X];
+                  else
+                    pblockA00[pos] = 0;
+                })
+                curr_y += DILATION_Y;
+#endif
+                src0_read += (ROW_PITCH * DILATION_Y);
+                Dtype blockB[KERNEL_WIDTH * TILE_N_LAST_DIV8];
+
+                interleaved_y = 0;
+                LOOP(KERNEL_WIDTH_DIV2, interleaved_y,
+                {
+#if TILE_N_LAST_DIV8 == 1
+                    Dtype2* p2BlockB = (Dtype2* )blockB;
+                    p2BlockB[interleaved_y] = as_Dtype2( SUB_GROUP_BLOCK_READ2( (const __global INT_TYPE*)src1_read ) );
+#elif TILE_N_LAST_DIV8 == 2
+                    Dtype4* p4BlockB = (Dtype4* )blockB;
+                    p4BlockB[interleaved_y] = as_Dtype4( SUB_GROUP_BLOCK_READ4( (const __global INT_TYPE*)src1_read ) );
+#elif TILE_N_LAST_DIV8 == 3
+                    //TODO: broken.  No block_read6
+                    Dtype6* p6BlockB = (Dtype6* )blockB;
+                    (*((Dtype8*)(&p6BlockB[interleaved_y]))).s0123 = as_Dtype4( SUB_GROUP_BLOCK_READ4( (const __global INT_TYPE*)src1_read ) );
+                    (*((Dtype8*)(&p6BlockB[interleaved_y]))).s45 = as_Dtype2( SUB_GROUP_BLOCK_READ2( (const __global INT_TYPE*)(src1_read + 4 * 8) ) );
+#endif
+                    src1_read += WIDTH1 * 2;
+                } )
+                if ( kernel_width_is_odd )
+                {
+#if TILE_N_LAST_DIV8 == 1
+                    Dtype* pBlockB = (Dtype* )blockB;
+                    pBlockB[KERNEL_WIDTH - 1] = as_Dtype( SUB_GROUP_BLOCK_READ( (const __global INT_TYPE*)src1_read ) );
+#elif TILE_N_LAST_DIV8 == 2
+                    Dtype2* p2BlockB = (Dtype2* )blockB;
+                    p2BlockB[KERNEL_WIDTH - 1] = as_Dtype2( SUB_GROUP_BLOCK_READ2( (const __global INT_TYPE*)src1_read ) );
+#elif TILE_N_LAST_DIV8 == 3
+                    Dtype3* p3BlockB = (Dtype3* )blockB;
+                    p3BlockB[KERNEL_WIDTH - 1].s01 = as_Dtype2( SUB_GROUP_BLOCK_READ2( (const __global INT_TYPE*)src1_read ) );
+                    p3BlockB[KERNEL_WIDTH - 1].s2 = as_Dtype( SUB_GROUP_BLOCK_READ( (const __global INT_TYPE*) (src1_read + 2 * 8) ) );
+#endif
+                    src1_read += WIDTH1 * 2;
+                }
+
+                // Perform MADs
+                Dtype* pBlockB = (Dtype*)blockB;
+                kernel_idx = 0;
+                interleaved_y = 0;
+                LOOP(KERNEL_WIDTH_DIV2, interleaved_y,
+                {
+                    kernel_y = interleaved_y * 2;
+                    DOT_PRODUCT_8( blockC[0], pblockA00[kernel_y    ], pBlockB[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC[0], pblockA00[kernel_y + 1], pBlockB[kernel_idx] ); kernel_idx++;
+#if TILE_N_LAST_DIV8 >= 2
+                    DOT_PRODUCT_8( blockC[1], pblockA00[kernel_y    ], pBlockB[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC[1], pblockA00[kernel_y + 1], pBlockB[kernel_idx] ); kernel_idx++;
+#if TILE_N_LAST_DIV8 >= 3
+                    DOT_PRODUCT_8( blockC[2], pblockA00[kernel_y    ], pBlockB[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC[2], pblockA00[kernel_y + 1], pBlockB[kernel_idx] ); kernel_idx++;
+#endif
+#endif
+                } )
+                    kernel_y = interleaved_y * 2;
+                if ( kernel_width_is_odd )
+                {
+                    DOT_PRODUCT_8( blockC[0], pblockA00[kernel_y], pBlockB[kernel_idx] ); kernel_idx++;
+#if TILE_N_LAST_DIV8 >= 2
+                    DOT_PRODUCT_8( blockC[1], pblockA00[kernel_y], pBlockB[kernel_idx] ); kernel_idx++;
+#if TILE_N_LAST_DIV8 >= 3
+                    DOT_PRODUCT_8( blockC[2], pblockA00[kernel_y], pBlockB[kernel_idx] ); kernel_idx++;
+#endif
+#endif
+                }
+            }
+
+            //while( ++patch_row < 1 ); //debug
+            while( ++patch_row < KERNEL_HEIGHT );
+
+            src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
+        }
+        //while ( ++patch_depth < 1 );  //debug
+        while ( ++patch_depth < INPUT_DEPTH );
+
+        // Dst resembles a cube of width x height x (output channel * batches).  Each tile writes:
+        // (SIMD * TILE_M) x 1 x TILE_N.  Partial writes most likely generated if padding used.
+        int out_offset = global_z * out_pitch_z                                                   // batch offset
+         + ( group_x * TILE_N ) * out_pitch_y                                       // channel offset
+         + ( ( global_y * TILE_M ) / output_width + OUT_PADDING_HEIGHT) * OUT_PITCH_X  // y offset
+         + ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT;               // x offset
+        __global Dtype *out = dst + out_offset;
+#if APPLY_BIAS
+        Dtype bias[4];
+        Dtype4 *bias_vec;
+        bias_vec = (Dtype4*)bias;
+        *bias_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)biases_base + group_x * TILE_N));
+#endif
+
+#ifdef FUSED_CONV_CHANNEL_RELU
+        Dtype slope[4];
+        Dtype4 *slope_vec;
+        slope_vec = (Dtype4*)slope;
+        *slope_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)negative_slope_base + group_x * TILE_N));
+        Dtype negative_slope;
+#endif
+
+        if (global_y * TILE_M < output_width * output_height )
+        {
+            for (int i = 0; i < 8; i++)
+            {
+                if ( TILE_N_LAST_DIV8 > 0 )
+                {
+#ifdef FUSED_CONV_CHANNEL_RELU
+                  negative_slope = intel_sub_group_shuffle(slope[0], i);
+#endif
+                  ACTIVATION_FUNCTION(dst, out_offset + ( 0+i) * out_pitch_y, blockC[0][i] + SUBGROUP_GET_BIAS(0, i));
+                }
+                if ( TILE_N_LAST_DIV8 > 1 )
+                {
+#ifdef FUSED_CONV_CHANNEL_RELU
+                  negative_slope = intel_sub_group_shuffle(slope[1], i);
+#endif
+                  ACTIVATION_FUNCTION(dst, out_offset + ( 8+i) * out_pitch_y, blockC[1][i] + SUBGROUP_GET_BIAS(1, i));
+                }
+                if ( TILE_N_LAST_DIV8 > 2 )
+                {
+#ifdef FUSED_CONV_CHANNEL_RELU
+                  negative_slope = intel_sub_group_shuffle(slope[2], i);
+#endif
+                  ACTIVATION_FUNCTION(dst, out_offset + (16+i) * out_pitch_y, blockC[2][i] + SUBGROUP_GET_BIAS(2, i));
+                }
+                if ( TILE_N_LAST_DIV8 > 3 )
+                {
+
+#ifdef FUSED_CONV_CHANNEL_RELU
+                  negative_slope = intel_sub_group_shuffle(slope[3], i);
+#endif
+                  ACTIVATION_FUNCTION(dst, out_offset + (24+i) * out_pitch_y, blockC[3][i] + SUBGROUP_GET_BIAS(3, i));
+                }
+            }
+        }
+    }
+#endif
+}
+#endif
+#ifdef GEMM_LIKE_CONV_32_2
+
+//////////////////////////////////////////////////////////////////////////////
+// Conv_Interleaved_32_2_flex
+//
+// Convolution: each workitem computes 1 patch x 32 filters worth of output
+// data.  Kernel's inner loop works on a single tile consisting of one
+// row from each patch and the filter data corresponding to that row.  Filter
+// matrix is interleaved to reduce GRF bank conflicts.  Patches are walked
+// by rows and then by slices.  Relies on sub_group extension for block
+// reads and SIMD broadcast.  Allows flexible sizing of TILE width (TILE_N)
+// by dynamically selecting one of two code paths: one uses TILE_N = 32 and
+// the other uses TILE_N = 8, 16, or 24.
+#define TILE_M          2
+#define TILE_K          KERNEL_WIDTH
+#define TILE_N          32
+
+#ifndef __BEIGNET__
+__attribute__((intel_reqd_sub_group_size(8)))
+#endif
+__kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
+{
+    const int group_x = get_group_id(0);
+    const int group_y = get_group_id(1);
+    const int global_x = get_global_id(0);
+    const int global_y = get_global_id(1);
+    const int global_z = get_global_id(2);
+    int interleaved_y;
+    int kernel_y;
+    int kernel_idx;
+
+#define DOT_PRODUCT_8( _result, _rowA, colB )    \
+    {   \
+        _result.s0 = mad( _rowA, sub_group_broadcast( colB, 0 ), _result.s0 );  \
+        _result.s1 = mad( _rowA, sub_group_broadcast( colB, 1 ), _result.s1 );  \
+        _result.s2 = mad( _rowA, sub_group_broadcast( colB, 2 ), _result.s2 );  \
+        _result.s3 = mad( _rowA, sub_group_broadcast( colB, 3 ), _result.s3 );  \
+        _result.s4 = mad( _rowA, sub_group_broadcast( colB, 4 ), _result.s4 );  \
+        _result.s5 = mad( _rowA, sub_group_broadcast( colB, 5 ), _result.s5 );  \
+        _result.s6 = mad( _rowA, sub_group_broadcast( colB, 6 ), _result.s6 );  \
+        _result.s7 = mad( _rowA, sub_group_broadcast( colB, 7 ), _result.s7 );  \
+    }
+        typedef CAT( Dtype, KERNEL_WIDTH ) Dtype_t;
+
+    // True for all threads if filter_width is multiple of TILE_N
+    // else, true for all but right-most column of threads.
+    if( TILE_N_LAST == 0 || global_x < WIDTH1 / TILE_N )
+    {
+        // Result ctile (*dst) is M rows x N columns
+        // LWG size is 1x8.  Thus each thread calculates 8*M rows x N cols of ctile.
+        Dtype8  blockC00 = 0.f;
+        Dtype8  blockC10 = 0.f;
+        Dtype8  blockC20 = 0.f;
+        Dtype8  blockC30 = 0.f;
+        Dtype8  blockC01 = 0.f;
+        Dtype8  blockC11 = 0.f;
+        Dtype8  blockC21 = 0.f;
+        Dtype8  blockC31 = 0.f;
+
+        // Src0 (patch input) is directly used as atile.
+        // Each work item points to the start of a different patch.
+        // atile is M rows x K columns.
+        int curr_x0 = ( ( global_y * TILE_M + 0 ) % output_width ) * STRIDE_X;
+        int curr_x1 = ( ( global_y * TILE_M + 1 ) % output_width ) * STRIDE_X;
+        int curr_y0 = ( ( global_y * TILE_M + 0 ) / output_width ) * STRIDE_Y;
+        int curr_y1 = ( ( global_y * TILE_M + 1 ) / output_width ) * STRIDE_Y;
+#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
+        int saved_y0 = curr_y0;
+        int saved_y1 = curr_y1;
+#endif
+        const __global Dtype *src0_read0 = src0
+         + aligned_input_size * global_z                                            // batch offset
+         + (curr_y0 - INPUT_PAD_H) * ROW_PITCH   // y offset
+         + curr_x0 - INPUT_PAD_W;                // x offset
+        const __global Dtype *src0_read1 = src0
+         + aligned_input_size * global_z                                            // batch offset
+         + (curr_y1 - INPUT_PAD_H) * ROW_PITCH   // y offset
+         + curr_x1 - INPUT_PAD_W;                // x offset
+
+        // Src1 (filter) is directly used as btile.
+        // It starts at the top of src1 and walks down.
+        // btile is K rows x N columns.
+        const __global Dtype *src1_read = src1 + ( global_x * TILE_N * 2);
+
+        // Walk DOWN src0 (patch 0, 1, 2, ...) and DOWN src1.
+        // Inner loop loads and FMADs one row (KERNEL_WIDTH) of each input patch
+        // and KERNEL_WIDTH/2 rows of interleaved filter.
+        int patch_depth = 0;
+        do
+        {
+            int patch_row = 0;
+            do
+            {
+                // Load atile and btile.
+                // Kernel data is partially interleaved.  Every 2 rows are interleaved at Dtype8 granularity.
+                // The exception is that if KERNEL_WIDTH is odd the last row is not interleaved.  The non
+                // interleaved row is padded with zero to ensure same size as interleaved rows. This
+                // interleaving is done to ensure 0% GDR bank conflicts.  For example, this is how the
+                // kernel data would be arranged before/after interleaving for KERNEL_WIDTH=3.
+                // (0, 0) (8, 0) (16, 0) (24, 0) ...       (0, 0) (0, 1) (8, 0) (0, 1) (16, 0) (0, 1) (24, 0) ..
+                // (0, 1) (8, 1) (16, 1) (24, 1) ... =>    (0, 2) (8, 2) (16, 2) (24, 2) ...
+                // (0, 2) (8, 2) (16, 2) (24, 2) ...       ...
+                // ...
+                const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
+#if INPUT_PAD_H == 0 && INPUT_PAD_W == 0 && DILATION_X == 1 && DILATION_Y == 1
+                Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read0 )[  0  ]; src0_read0 += ROW_PITCH;
+                Dtype_t blockA01 = ( (const __global Dtype_t*)src0_read1 )[  0  ]; src0_read1 += ROW_PITCH;
+                Dtype*  pblockA00 = (Dtype*)(&blockA00);
+                Dtype*  pblockA01 = (Dtype*)(&blockA01);
+#else
+                Dtype_t blockA00;
+                Dtype*  pblockA00 = (Dtype*)(&blockA00);
+                int pos = 0;
+                LOOP(KERNEL_WIDTH, pos,
+                {
+                  if (curr_y0 >= INPUT_PAD_H && curr_y0 < input_height + INPUT_PAD_H && curr_x0 + pos * DILATION_X >= INPUT_PAD_W && curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
+                    pblockA00[pos] = src0_read0[pos * DILATION_X];
+                  else
+                    pblockA00[pos] = 0;
+                })
+                curr_y0 += DILATION_Y;
+                Dtype_t blockA01;
+                Dtype*  pblockA01 = (Dtype*)(&blockA01);
+                pos = 0;
+                LOOP(KERNEL_WIDTH, pos,
+                {
+                  if (curr_y1 >= INPUT_PAD_H && curr_y1 < input_height + INPUT_PAD_H && curr_x1 + pos * DILATION_X >= INPUT_PAD_W && curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
+                    pblockA01[pos] = src0_read1[pos * DILATION_X];
+                  else
+                    pblockA01[pos] = 0;
+                })
+                curr_y1 += DILATION_Y;
+                src0_read0 += (ROW_PITCH * DILATION_Y);
+                src0_read1 += (ROW_PITCH * DILATION_Y);
+#endif
+                Dtype blockB00[KERNEL_WIDTH*4];
+                Dtype8* p8BlockB00 = (Dtype8*)blockB00;
+                Dtype4* p4BlockB00 = (Dtype4*)blockB00;
+                Dtype*  pBlockB00 =  (Dtype* )blockB00;
+
+                interleaved_y = 0;
+                LOOP(KERNEL_WIDTH_DIV2, interleaved_y,
+                {
+                    p8BlockB00[interleaved_y] = as_Dtype8( SUB_GROUP_BLOCK_READ8( (const __global INT_TYPE*)src1_read ) );
+                    src1_read += WIDTH1 * 2;
+                } )
+                if ( kernel_width_is_odd )
+                {
+                    p4BlockB00[KERNEL_WIDTH - 1] = as_Dtype4( SUB_GROUP_BLOCK_READ4( (const __global INT_TYPE*)src1_read ) );
+                    src1_read += WIDTH1 * 2;
+                }
+                // Perform MADs
+                kernel_idx = 0;
+                interleaved_y = 0;
+                LOOP(KERNEL_WIDTH_DIV2, interleaved_y,
+                {
+                    kernel_y = interleaved_y * 2;
+                    DOT_PRODUCT_8( blockC00, pblockA00[kernel_y    ], pBlockB00[kernel_idx] );
+                    DOT_PRODUCT_8( blockC01, pblockA01[kernel_y    ], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC00, pblockA00[kernel_y + 1], pBlockB00[kernel_idx] );
+                    DOT_PRODUCT_8( blockC01, pblockA01[kernel_y + 1], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC10, pblockA00[kernel_y    ], pBlockB00[kernel_idx] );
+                    DOT_PRODUCT_8( blockC11, pblockA01[kernel_y    ], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC10, pblockA00[kernel_y + 1], pBlockB00[kernel_idx] );
+                    DOT_PRODUCT_8( blockC11, pblockA01[kernel_y + 1], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC20, pblockA00[kernel_y    ], pBlockB00[kernel_idx] );
+                    DOT_PRODUCT_8( blockC21, pblockA01[kernel_y    ], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC20, pblockA00[kernel_y + 1], pBlockB00[kernel_idx] );
+                    DOT_PRODUCT_8( blockC21, pblockA01[kernel_y + 1], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC30, pblockA00[kernel_y    ], pBlockB00[kernel_idx] );
+                    DOT_PRODUCT_8( blockC31, pblockA01[kernel_y    ], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC30, pblockA00[kernel_y + 1], pBlockB00[kernel_idx] );
+                    DOT_PRODUCT_8( blockC31, pblockA01[kernel_y + 1], pBlockB00[kernel_idx] ); kernel_idx++;
+                } )
+                if ( kernel_width_is_odd )
+                {
+                    kernel_y = interleaved_y * 2;
+                    DOT_PRODUCT_8( blockC00, pblockA00[kernel_y], pBlockB00[kernel_idx] );
+                    DOT_PRODUCT_8( blockC01, pblockA01[kernel_y], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC10, pblockA00[kernel_y], pBlockB00[kernel_idx] );
+                    DOT_PRODUCT_8( blockC11, pblockA01[kernel_y], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC20, pblockA00[kernel_y], pBlockB00[kernel_idx] );
+                    DOT_PRODUCT_8( blockC21, pblockA01[kernel_y], pBlockB00[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC30, pblockA00[kernel_y], pBlockB00[kernel_idx] );
+                    DOT_PRODUCT_8( blockC31, pblockA01[kernel_y], pBlockB00[kernel_idx] ); kernel_idx++;
+                }
+            }
+
+            //while( ++patch_row < 1 ); //debug
+            while( ++patch_row < KERNEL_HEIGHT );
+#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || DILATION_X != 1 || DILATION_Y != 1
+            curr_y0 = saved_y0;
+            curr_y1 = saved_y1;
+#endif
+            src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
+            src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
+        }
+        //while ( ++patch_depth < 1 );  //debug
+        while ( ++patch_depth < INPUT_DEPTH );
+
+        // Dst resembles a cube of width x height x (output channel * batches).  Each tile writes:
+        // (SIMD * TILE_M) x 1 x TILE_N.  Partial writes most likely generated if padding used.
+        int out0_offset = global_z * out_pitch_z                                                       // batch offset
+         + ( group_x * TILE_N ) * out_pitch_y                                           // channel offset
+         + ( ( global_y * TILE_M + 0 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
+         + ( ( global_y * TILE_M + 0 ) % output_width ) + OUT_PADDING_LEFT;               // x offset
+        int out1_offset = global_z * out_pitch_z                                                       // batch offset
+         + ( group_x * TILE_N ) * out_pitch_y                                           // channel offset
+         + ( ( global_y * TILE_M + 1 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
+         + ( ( global_y * TILE_M + 1 ) % output_width ) + OUT_PADDING_LEFT;               // x offset
+
+#if APPLY_BIAS
+        Dtype bias[4];
+        Dtype4 *bias_vec;
+        bias_vec = (Dtype4*)bias;
+        *bias_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)biases_base + group_x * TILE_N));
+#endif
+
+#ifdef FUSED_CONV_CHANNEL_RELU
+        Dtype slope[4];
+        Dtype4 *slope_vec;
+        slope_vec = (Dtype4*)slope;
+        *slope_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)negative_slope_base + group_x * TILE_N));
+        Dtype negative_slope;
+#endif
+
+        if( global_y * TILE_M < output_width * output_height )
+        {
+            for( int i = 0; i < 8; i++ )
+            {
+#ifdef FUSED_CONV_CHANNEL_RELU
+                negative_slope = intel_sub_group_shuffle(slope[0], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out0_offset + ( 0+i) * out_pitch_y, blockC00[i] + SUBGROUP_GET_BIAS(0, i));
+#ifdef FUSED_CONV_CHANNEL_RELU
+                negative_slope = intel_sub_group_shuffle(slope[1], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out0_offset + ( 8+i) * out_pitch_y, blockC10[i] + SUBGROUP_GET_BIAS(1, i));
+#ifdef FUSED_CONV_CHANNEL_RELU
+                negative_slope = intel_sub_group_shuffle(slope[2], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out0_offset + (16+i) * out_pitch_y, blockC20[i] + SUBGROUP_GET_BIAS(2, i));
+#ifdef FUSED_CONV_CHANNEL_RELU
+                negative_slope = intel_sub_group_shuffle(slope[3], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out0_offset + (24+i) * out_pitch_y, blockC30[i] + SUBGROUP_GET_BIAS(3, i));
+            }
+        }
+        if( global_y * TILE_M + 1 < output_width * output_height )
+        {
+            for( int i = 0; i < 8; i++ )
+            {
+
+#ifdef FUSED_CONV_CHANNEL_RELU
+                negative_slope = intel_sub_group_shuffle(slope[0], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out1_offset + ( 0+i) * out_pitch_y, blockC01[i] + SUBGROUP_GET_BIAS(0, i));
+
+#ifdef FUSED_CONV_CHANNEL_RELU
+                negative_slope = intel_sub_group_shuffle(slope[1], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out1_offset + ( 8+i) * out_pitch_y, blockC11[i] + SUBGROUP_GET_BIAS(1, i));
+
+#ifdef FUSED_CONV_CHANNEL_RELU
+                negative_slope = intel_sub_group_shuffle(slope[2], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out1_offset + (16+i) * out_pitch_y, blockC21[i] + SUBGROUP_GET_BIAS(2, i));
+
+#ifdef FUSED_CONV_CHANNEL_RELU
+                negative_slope = intel_sub_group_shuffle(slope[3], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out1_offset + (24+i) * out_pitch_y, blockC31[i] + SUBGROUP_GET_BIAS(3, i));
+            }
+        }
+    }
+#if TILE_N_LAST > 0
+    else
+    {
+
+        // Result ctile (*dst) is M rows x N columns
+        // LWG size is 1x8.  Thus each thread calculates 8*M rows x N cols of ctile.
+        int i = 0;
+        Dtype8  blockC0[TILE_N_LAST_DIV8];
+        Dtype8  blockC1[TILE_N_LAST_DIV8];
+        LOOP(TILE_N_LAST_DIV8, i,
+        {
+            blockC0[i] = 0.f;
+            blockC1[i] = 0.f;
+        } )
+
+        // Src0 (patch input) is directly used as atile.
+        // Each work item points to the start of a different patch.
+        // atile is M rows x K columns.
+        int curr_x0 = ( ( global_y * TILE_M + 0 ) % output_width ) * STRIDE_X;
+        int curr_x1 = ( ( global_y * TILE_M + 1 ) % output_width ) * STRIDE_X;
+        int curr_y0 = ( ( global_y * TILE_M + 0 ) / output_width ) * STRIDE_Y;
+        int curr_y1 = ( ( global_y * TILE_M + 1 ) / output_width ) * STRIDE_Y;
+#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
+        int saved_y0 = curr_y0;
+        int saved_y1 = curr_y1;
+#endif
+        const __global Dtype *src0_read0 = src0
+         + aligned_input_size * global_z                                            // batch offset
+         + (curr_y0 - INPUT_PAD_H) * ROW_PITCH   // y offset
+         + curr_x0 - INPUT_PAD_W;                // x offset
+        const __global Dtype *src0_read1 = src0
+         + aligned_input_size * global_z                                            // batch offset
+         + (curr_y1 - INPUT_PAD_H) * ROW_PITCH   // y offset
+         + curr_x1 - INPUT_PAD_W;                // x offset
+
+        // Src1 (filter) is directly used as btile.
+        // It starts at the top of src1 and walks down.
+        // btile is K rows x N columns.
+        const __global Dtype *src1_read = src1 + ( global_x * TILE_N  * 2);
+
+        // Walk DOWN src0 (patch 0, 1, 2, ...) and DOWN src1.
+        // Inner loop loads and FMADs one row (KERNEL_WIDTH) of each input patch
+        // and KERNEL_WIDTH/2 rows of interleaved filter.
+        int patch_depth = 0;
+        do
+        {
+            int patch_row = 0;
+            do
+            {
+                // Load atile and interleaved btile.
+                const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
+#if INPUT_PAD_H == 0 && INPUT_PAD_W == 0 && DILATION_X == 1 && DILATION_Y == 1
+                Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read0 )[  0  ]; src0_read0 += ROW_PITCH;
+                Dtype_t blockA01 = ( (const __global Dtype_t*)src0_read1 )[  0  ]; src0_read1 += ROW_PITCH;
+                Dtype*  pblockA00 = (Dtype*)(&blockA00);
+                Dtype*  pblockA01 = (Dtype*)(&blockA01);
+#else
+                Dtype_t blockA00;
+                Dtype*  pblockA00 = (Dtype*)(&blockA00);
+                int pos = 0;
+                LOOP(KERNEL_WIDTH, pos,
+                {
+                  if (curr_y0 >= INPUT_PAD_H && curr_y0 < input_height + INPUT_PAD_H && curr_x0 + pos * DILATION_X >= INPUT_PAD_W && curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
+                    pblockA00[pos] = src0_read0[pos * DILATION_X];
+                  else
+                    pblockA00[pos] = 0;
+                })
+                curr_y0 += DILATION_Y;
+                Dtype_t blockA01;
+                Dtype*  pblockA01 = (Dtype*)(&blockA01);
+                pos = 0;
+                LOOP(KERNEL_WIDTH, pos,
+                {
+                  if (curr_y1 >= INPUT_PAD_H && curr_y1 < input_height + INPUT_PAD_H && curr_x1 + pos * DILATION_X >= INPUT_PAD_W && curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
+                    pblockA01[pos] = src0_read1[pos * DILATION_X];
+                  else
+                    pblockA01[pos] = 0;
+                })
+                curr_y1 += DILATION_Y;
+                src0_read0 += (ROW_PITCH * DILATION_Y);
+                src0_read1 += (ROW_PITCH * DILATION_Y);
+#endif
+                Dtype blockB[KERNEL_WIDTH * TILE_N_LAST_DIV8];
+
+                interleaved_y = 0;
+                LOOP(KERNEL_WIDTH_DIV2, interleaved_y,
+                {
+#if TILE_N_LAST_DIV8 == 1
+                    Dtype2* p2BlockB = (Dtype2* )blockB;
+                    p2BlockB[interleaved_y] = as_Dtype2( SUB_GROUP_BLOCK_READ2( (const __global INT_TYPE*)src1_read ) );
+#elif TILE_N_LAST_DIV8 == 2
+                    Dtype4* p4BlockB = (Dtype4* )blockB;
+                    p4BlockB[interleaved_y] = as_Dtype4( SUB_GROUP_BLOCK_READ4( (const __global INT_TYPE*)src1_read ) );
+#elif TILE_N_LAST_DIV8 == 3
+                    //TODO: broken.  No block_read6
+                    Dtype6* p6BlockB = (Dtype6* )blockB;
+                    (*((Dtype8*)(&p6BlockB[interleaved_y]))).s0123 = as_Dtype4( SUB_GROUP_BLOCK_READ4( (const __global INT_TYPE*)src1_read ) );
+                    (*((Dtype8*)(&p6BlockB[interleaved_y]))).s45 = as_Dtype2( SUB_GROUP_BLOCK_READ2( (const __global INT_TYPE*)(src1_read + 4 * 8) ) );
+#endif
+                    src1_read += WIDTH1 * 2;
+                } )
+                if ( kernel_width_is_odd )
+                {
+#if TILE_N_LAST_DIV8 == 1
+                    Dtype* pBlockB = (Dtype* )blockB;
+                    pBlockB[KERNEL_WIDTH - 1] = as_Dtype( SUB_GROUP_BLOCK_READ( (const __global INT_TYPE*)src1_read ) );
+#elif TILE_N_LAST_DIV8 == 2
+                    Dtype2* p2BlockB = (Dtype2* )blockB;
+                    p2BlockB[KERNEL_WIDTH - 1] = as_Dtype2( SUB_GROUP_BLOCK_READ2( (const __global INT_TYPE*)src1_read ) );
+#elif TILE_N_LAST_DIV8 == 3
+                    Dtype3* p3BlockB = (Dtype3* )blockB;
+                    p3BlockB[KERNEL_WIDTH - 1].s01 = as_Dtype2( SUB_GROUP_BLOCK_READ2( (const __global INT_TYPE*)src1_read ) );
+                    p3BlockB[KERNEL_WIDTH - 1].s2 = as_Dtype( SUB_GROUP_BLOCK_READ( (const __global INT_TYPE*) (src1_read + 8) ) );
+#endif
+                    src1_read += WIDTH1 * 2;
+                }
+
+                // Perform MADs
+                Dtype* pBlockB = (Dtype*)blockB;
+                kernel_idx = 0;
+                interleaved_y = 0;
+                LOOP(KERNEL_WIDTH_DIV2, interleaved_y,
+                {
+                    kernel_y = interleaved_y * 2;
+                    DOT_PRODUCT_8( blockC0[0], pblockA00[kernel_y    ], pBlockB[kernel_idx] );
+                    DOT_PRODUCT_8( blockC1[0], pblockA01[kernel_y    ], pBlockB[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC0[0], pblockA00[kernel_y + 1], pBlockB[kernel_idx] );
+                    DOT_PRODUCT_8( blockC1[0], pblockA01[kernel_y + 1], pBlockB[kernel_idx] ); kernel_idx++;
+#if TILE_N_LAST_DIV8 >= 2
+                    DOT_PRODUCT_8( blockC0[1], pblockA00[kernel_y    ], pBlockB[kernel_idx] );
+                    DOT_PRODUCT_8( blockC1[1], pblockA01[kernel_y    ], pBlockB[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC0[1], pblockA00[kernel_y + 1], pBlockB[kernel_idx] );
+                    DOT_PRODUCT_8( blockC1[1], pblockA01[kernel_y + 1], pBlockB[kernel_idx] ); kernel_idx++;
+#if TILE_N_LAST_DIV8 >= 3
+                    DOT_PRODUCT_8( blockC0[2], pblockA00[kernel_y    ], pBlockB[kernel_idx] );
+                    DOT_PRODUCT_8( blockC1[2], pblockA01[kernel_y    ], pBlockB[kernel_idx] ); kernel_idx++;
+                    DOT_PRODUCT_8( blockC0[2], pblockA00[kernel_y + 1], pBlockB[kernel_idx] );
+                    DOT_PRODUCT_8( blockC1[2], pblockA01[kernel_y + 1], pBlockB[kernel_idx] ); kernel_idx++;
+#endif
+#endif
+                } )
+                    kernel_y = interleaved_y * 2;
+                if ( kernel_width_is_odd )
+                {
+                    DOT_PRODUCT_8( blockC0[0], pblockA00[kernel_y], pBlockB[kernel_idx] );
+                    DOT_PRODUCT_8( blockC1[0], pblockA01[kernel_y], pBlockB[kernel_idx] ); kernel_idx++;
+#if TILE_N_LAST_DIV8 >= 2
+                    DOT_PRODUCT_8( blockC0[1], pblockA00[kernel_y], pBlockB[kernel_idx] );
+                    DOT_PRODUCT_8( blockC1[1], pblockA01[kernel_y], pBlockB[kernel_idx] ); kernel_idx++;
+#if TILE_N_LAST_DIV8 >= 3
+                    DOT_PRODUCT_8( blockC0[2], pblockA00[kernel_y], pBlockB[kernel_idx] );
+                    DOT_PRODUCT_8( blockC1[2], pblockA01[kernel_y], pBlockB[kernel_idx] ); kernel_idx++;
+#endif
+#endif
+                }
+            }
+
+            //while( ++patch_row < 1 ); //debug
+            while( ++patch_row < KERNEL_HEIGHT );
+#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || DILATION_X != 1 || DILATION_Y != 1
+            curr_y0 = saved_y0;
+            curr_y1 = saved_y1;
+#endif
+            src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
+            src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
+        }
+        //while ( ++patch_depth < 1 );  //debug
+        while ( ++patch_depth < INPUT_DEPTH );
+
+        // Dst resembles a cube of width x height x (output channel * batches).  Each tile writes:
+        // (SIMD * TILE_M) x 1 x TILE_N.  Partial writes most likely generated if padding used.
+        int out0_offset = global_z * out_pitch_z                                                       // batch offset
+         + ( group_x * TILE_N ) * out_pitch_y                                           // channel offset
+         + ( ( global_y * TILE_M + 0 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
+         + ( ( global_y * TILE_M + 0 ) % output_width ) + OUT_PADDING_LEFT;               // x offset
+        int out1_offset = global_z * out_pitch_z                                                       // batch offset
+         + ( group_x * TILE_N ) * out_pitch_y                                           // channel offset
+         + ( ( global_y * TILE_M + 1 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
+         + ( ( global_y * TILE_M + 1 ) % output_width ) + OUT_PADDING_LEFT;               // x offset
+        __global Dtype *out1 = dst + out1_offset;
+
+#if APPLY_BIAS
+        Dtype bias[4];
+        Dtype4 *bias_vec;
+        bias_vec = (Dtype4*)bias;
+        *bias_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)biases_base + group_x * TILE_N));
+#endif
+#ifdef FUSED_CONV_CHANNEL_RELU
+        Dtype slope[4];
+        Dtype4 *slope_vec;
+        slope_vec = (Dtype4*)slope;
+        *slope_vec = as_Dtype4(SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)negative_slope_base + group_x * TILE_N));
+        Dtype negative_slope;
+#endif
+        if( global_y * TILE_M < output_width * output_height )
+        {
+            for( int i = 0; i < 8; i++ )
+            {
+                if ( TILE_N_LAST_DIV8 > 0 )
+                {
+
+#ifdef FUSED_CONV_CHANNEL_RELU
+                  negative_slope = intel_sub_group_shuffle(slope[0], i);
+#endif
+                  ACTIVATION_FUNCTION(dst, out0_offset + ( 0+i) * out_pitch_y, blockC0[0][i] + SUBGROUP_GET_BIAS(0, i));
+                }
+                if ( TILE_N_LAST_DIV8 > 1 )
+                {
+#ifdef FUSED_CONV_CHANNEL_RELU
+                negative_slope = intel_sub_group_shuffle(slope[1], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out0_offset + ( 8+i) * out_pitch_y, blockC0[1][i] + SUBGROUP_GET_BIAS(1, i));
+                }
+                if ( TILE_N_LAST_DIV8 > 2 )
+                {
+#ifdef FUSED_CONV_CHANNEL_RELU
+               negative_slope = intel_sub_group_shuffle(slope[2], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out0_offset + (16+i) * out_pitch_y, blockC0[2][i] + SUBGROUP_GET_BIAS(2, i));
+                }
+                if ( TILE_N_LAST_DIV8 > 3 )
+                {
+#ifdef FUSED_CONV_CHANNEL_RELU
+                negative_slope = intel_sub_group_shuffle(slope[3], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out0_offset + (24+i) * out_pitch_y, blockC0[3][i] + SUBGROUP_GET_BIAS(3, i));
+                }
+            }
+        }
+        if( global_y * TILE_M + 1 < output_width * output_height )
+        {
+            for( int i = 0; i < 8; i++ )
+            {
+                if ( TILE_N_LAST_DIV8 > 0 )
+                {
+#ifdef FUSED_CONV_CHANNEL_RELU
+                negative_slope = intel_sub_group_shuffle(slope[0], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out1_offset + ( 0+i) * out_pitch_y, blockC1[0][i] + SUBGROUP_GET_BIAS(0, i));
+                }
+                if ( TILE_N_LAST_DIV8 > 1 )
+                {
+#ifdef FUSED_CONV_CHANNEL_RELU
+                negative_slope = intel_sub_group_shuffle(slope[1], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out1_offset + ( 8+i) * out_pitch_y, blockC1[1][i] + SUBGROUP_GET_BIAS(1, i));
+                }
+                if ( TILE_N_LAST_DIV8 > 2 )
+                {
+#ifdef FUSED_CONV_CHANNEL_RELU
+                negative_slope = intel_sub_group_shuffle(slope[2], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out1_offset + (16+i) * out_pitch_y, blockC1[2][i] + SUBGROUP_GET_BIAS(2, i));
+                }
+                if ( TILE_N_LAST_DIV8 > 3 )
+                {
+#ifdef FUSED_CONV_CHANNEL_RELU
+                negative_slope = intel_sub_group_shuffle(slope[3], i);
+#endif
+                ACTIVATION_FUNCTION(dst, out1_offset + (24+i) * out_pitch_y, blockC1[3][i] + SUBGROUP_GET_BIAS(3, i));
+                }
+            }
+        }
+    }
+#endif
+}
+#endif
+
+#if defined(GEMM_LIKE_CONV_32_2_SIMD16) || defined(GEMM_LIKE_CONV_32_1_SIMD16)
+#ifdef FUSED_CONV_CHANNEL_RELU
+#define INTERLEAVED_SIMD16_OUTPUT(_out_, _offset_,  _m_) do {\
+    if (global_y * TILE_M < output_width * output_height ) \
+    { \
+      if ( ( OUT_DEPTH % TILE_N ) == 0 ) {\
+        for (int i = 0; i < 16; i++) \
+        { \
+          negative_slope = intel_sub_group_shuffle(slope[0], i); \
+          ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
+          negative_slope = intel_sub_group_shuffle(slope[1], i); \
+          ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i)); \
+        } \
+      } \
+      else if( ( OUT_DEPTH % 16 ) == 0 ) { \
+        if ( ( global_x + 1 ) < get_global_size(0) ) { \
+          for ( int i = 0; i < 16; i++ ) \
+          { \
+            negative_slope = intel_sub_group_shuffle(slope[0], i); \
+            ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
+            negative_slope = intel_sub_group_shuffle(slope[1], i); \
+            ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i)); \
+          } \
+        } \
+        else { \
+          for (int i = 0; i < 16; i++) \
+          { \
+          negative_slope = intel_sub_group_shuffle(slope[0], i); \
+            ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
+          } \
+        } \
+      } \
+      else { \
+        if ( ( global_x + 1 ) < get_global_size(0) ) \
+        { \
+          for ( int i = 0; i < 16; i++ ) \
+          { \
+          negative_slope = intel_sub_group_shuffle(slope[0], i); \
+            ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
+          negative_slope = intel_sub_group_shuffle(slope[1], i); \
+            ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i)); \
+          } \
+        } \
+        else { \
+          if ( (OUT_DEPTH % TILE_N) > 16 ) { \
+            for (int i = 0; i < 16 ; i++) \
+            { \
+          negative_slope = intel_sub_group_shuffle(slope[0], i); \
+              ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
+            } \
+            for (int i = 0; i < OUT_DEPTH % 16 ; i++) \
+            { \
+          negative_slope = intel_sub_group_shuffle(slope[1], i); \
+              ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i)); \
+            } \
+          } \
+          else { \
+            for (int i = 0; i < OUT_DEPTH % 16 ; i++) \
+            { \
+            negative_slope = intel_sub_group_shuffle(slope[0], i); \
+              ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
+            } \
+          } \
+        } \
+      } \
+    } \
+ }while(0)
+#else
+#define INTERLEAVED_SIMD16_OUTPUT(_out_, _offset_,  _m_) do {\
+    if (global_y * TILE_M < output_width * output_height ) \
+    { \
+      if ( ( OUT_DEPTH % TILE_N ) == 0 ) {\
+        for (int i = 0; i < 16; i++) \
+        { \
+          ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
+          ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i)); \
+        } \
+      } \
+      else if( ( OUT_DEPTH % 16 ) == 0 ) { \
+        if ( ( global_x + 1 ) < get_global_size(0) ) { \
+          for ( int i = 0; i < 16; i++ ) \
+          { \
+            ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
+            ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_ [i] + SUBGROUP_GET_BIAS(1, i)); \
+          } \
+        } \
+        else { \
+          for (int i = 0; i < 16; i++) \
+          { \
+            ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_ [i] + SUBGROUP_GET_BIAS(0, i)); \
+          } \
+        } \
+      } \
+      else { \
+        if ( ( global_x + 1 ) < get_global_size(0) ) \
+        { \
+          for ( int i = 0; i < 16; i++ ) \
+          { \
+            ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
+            ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i)); \
+          } \
+        } \
+        else { \
+          if ( (OUT_DEPTH % TILE_N) > 16 ) { \
+            for (int i = 0; i < 16 ; i++) \
+            { \
+              ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
+            } \
+            for (int i = 0; i < OUT_DEPTH % 16 ; i++) \
+            { \
+              ACTIVATION_FUNCTION(_out_, _offset_ + (16+i) * out_pitch_y, blockC1 ##_m_[i] + SUBGROUP_GET_BIAS(1, i)); \
+            } \
+          } \
+          else { \
+            for (int i = 0; i < OUT_DEPTH % 16 ; i++) \
+            { \
+              ACTIVATION_FUNCTION(_out_, _offset_ + ( 0+i) * out_pitch_y, blockC0 ##_m_[i] + SUBGROUP_GET_BIAS(0, i)); \
+            } \
+          } \
+        } \
+      } \
+    } \
+ }while(0)
+#endif
+#endif
+
+#ifdef GEMM_LIKE_CONV_32_1_SIMD16
+#define TILE_M          1
+#define TILE_K          KERNEL_WIDTH
+#define TILE_N          32
+
+#ifndef __BEIGNET__
+__attribute__((intel_reqd_sub_group_size(16)))
+#endif
+__kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
+{
+    const int group_x = get_group_id(0);
+    const int group_y = get_group_id(1);
+    const int global_x = get_global_id(0);
+    const int global_y = get_global_id(1);
+    const int global_z = get_global_id(2);
+    int interleaved_y;
+    int kernel_y;
+    int kernel_idx;
+
+    // Result ctile (*dst) is M rows x N columns
+    // LWG size is 1x16.  Thus each thread calculates 16*M rows x N cols of ctile.
+    Dtype16  blockC00 = 0.f;
+    Dtype16  blockC10 = 0.f;
+
+    // Src0 (patch input) is directly used as atile.
+    // Each work item points to the start of a different patch.
+    // atile is M rows x K columns.
+    int curr_x = ( global_y % output_width ) * STRIDE_X;
+    int curr_y = ( global_y / output_width ) * STRIDE_Y;
+#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
+    int saved_y = curr_y;
+#endif
+    const __global Dtype *src0_read = src0
+     + aligned_input_size * global_z                            // batch offset
+     + (curr_y - INPUT_PAD_H) * ROW_PITCH      // y offset
+     + curr_x - INPUT_PAD_W;                 // x offset
+     const __global Dtype *src0_read_orig = src0_read;
+
+    // Src1 (filter) is directly used as btile.
+    // It starts at the top of src1 and walks down.
+    // btile is K rows x N columns.
+    const __global Dtype *src1_read = src1 + ( global_x * TILE_N * 2 );
+
+#define DOT_PRODUCT_16( _result, _rowA, colB )    \
+    {   \
+        _result.s0 = mad( _rowA, sub_group_broadcast( colB,  0 ), _result.s0 );  \
+        _result.s1 = mad( _rowA, sub_group_broadcast( colB,  1 ), _result.s1 );  \
+        _result.s2 = mad( _rowA, sub_group_broadcast( colB,  2 ), _result.s2 );  \
+        _result.s3 = mad( _rowA, sub_group_broadcast( colB,  3 ), _result.s3 );  \
+        _result.s4 = mad( _rowA, sub_group_broadcast( colB,  4 ), _result.s4 );  \
+        _result.s5 = mad( _rowA, sub_group_broadcast( colB,  5 ), _result.s5 );  \
+        _result.s6 = mad( _rowA, sub_group_broadcast( colB,  6 ), _result.s6 );  \
+        _result.s7 = mad( _rowA, sub_group_broadcast( colB,  7 ), _result.s7 );  \
+        _result.s8 = mad( _rowA, sub_group_broadcast( colB,  8 ), _result.s8 );  \
+        _result.s9 = mad( _rowA, sub_group_broadcast( colB,  9 ), _result.s9 );  \
+        _result.sa = mad( _rowA, sub_group_broadcast( colB, 10 ), _result.sa );  \
+        _result.sb = mad( _rowA, sub_group_broadcast( colB, 11 ), _result.sb );  \
+        _result.sc = mad( _rowA, sub_group_broadcast( colB, 12 ), _result.sc );  \
+        _result.sd = mad( _rowA, sub_group_broadcast( colB, 13 ), _result.sd );  \
+        _result.se = mad( _rowA, sub_group_broadcast( colB, 14 ), _result.se );  \
+        _result.sf = mad( _rowA, sub_group_broadcast( colB, 15 ), _result.sf );  \
+    }
+    typedef CAT( Dtype, KERNEL_WIDTH ) Dtype_t;
+    // Walk DOWN src0 (patch 0, 1, 2, ...) and DOWN src1.
+    // Inner loop loads and FMADs one row (KERNEL_WIDTH) of each input patch
+    // and KERNEL_WIDTH/2 rows of interleaved filter.
+    int patch_depth = 0;
+#ifndef __BEIGNET__
+    __attribute__((opencl_unroll_hint(1)))
+#endif
+    do
+    {
+        int patch_row = 0;
+#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
+        curr_y = saved_y;
+#endif
+#ifndef __BEIGNET__
+        __attribute__((opencl_unroll_hint(1)))
+#endif
+        do
+        {
+            // Load atile and btile.
+            // Kernel data is partially interleaved.  Every 2 rows are interleaved at Dtype16 granularity.
+            // The exception is that if KERNEL_WIDTH is odd the last row is not interleaved.  The non
+            // interleaved row is padded with zero to ensure same size as interleaved rows. This
+            // interleaving is done to ensure 0% GDR bank conflicts.  For example, this is how the
+            // kernel data would be arranged before/after interleaving for KERNEL_WIDTH=3.
+            // (0, 0) (16, 0) (32, 0) (48, 0) ...     (0, 0) ( 0, 1) (16, 0) ( 0, 1) (32, 0) (0, 1) (48, 0) ...
+            // (0, 1) (16, 1) (32, 1) (48, 1) ... =>  (0, 2) (16, 2) (32, 2) (48, 2) ...
+            // (0, 2) (16, 2) (32, 2) (48, 2) ...     ...
+            // ...
+            const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
+
+#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1
+            Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[  0  ];
+            Dtype*  pblockA00 = (Dtype*)(&blockA00);
+#else
+            Dtype_t blockA00;
+            Dtype*  pblockA00 = (Dtype*)(&blockA00);
+            int pos = 0;
+            LOOP(KERNEL_WIDTH, pos,
+            {
+              if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
+                pblockA00[pos] = src0_read[pos * DILATION_X];
+              else
+                pblockA00[pos] = 0;
+            })
+            curr_y += DILATION_Y;
+#endif
+            src0_read += ROW_PITCH * DILATION_Y;
+            INT_TYPE blockB00[KERNEL_WIDTH * 2];
+            INT_TYPE4* p4BlockB00 = (INT_TYPE4*)blockB00;
+            INT_TYPE2* p2BlockB00 = (INT_TYPE2*)blockB00;
+            Dtype* pBlockB00  = (Dtype*)blockB00;
+            interleaved_y = 0;
+            LOOP(KERNEL_WIDTH_DIV2, interleaved_y,
+            {
+                p4BlockB00[interleaved_y] = SUB_GROUP_BLOCK_READ4( (const __global INT_TYPE*)src1_read );
+                src1_read += WIDTH1 * 2;
+            } )
+            if ( kernel_width_is_odd )
+            {
+                p2BlockB00[KERNEL_WIDTH - 1] = SUB_GROUP_BLOCK_READ2( (const __global INT_TYPE*)src1_read );
+                src1_read += WIDTH1 * 2;
+            }
+
+            // Perform MADs
+            kernel_idx = 0;
+            interleaved_y = 0;
+            LOOP(KERNEL_WIDTH_DIV2, interleaved_y,
+            {
+                kernel_y = interleaved_y * 2;
+                DOT_PRODUCT_16( blockC00, pblockA00[kernel_y    ], pBlockB00[kernel_idx] ); kernel_idx++;
+                DOT_PRODUCT_16( blockC00, pblockA00[kernel_y + 1], pBlockB00[kernel_idx] ); kernel_idx++;
+                DOT_PRODUCT_16( blockC10, pblockA00[kernel_y    ], pBlockB00[kernel_idx] ); kernel_idx++;
+                DOT_PRODUCT_16( blockC10, pblockA00[kernel_y + 1], pBlockB00[kernel_idx] ); kernel_idx++;
+            } )
+            if ( kernel_width_is_odd )
+            {
+                kernel_y = interleaved_y * 2;
+                DOT_PRODUCT_16( blockC00, pblockA00[kernel_y], pBlockB00[kernel_idx] ); kernel_idx++;
+                DOT_PRODUCT_16( blockC10, pblockA00[kernel_y], pBlockB00[kernel_idx] ); kernel_idx++;
+            }
+        }
+
+        //while( ++patch_row < 1 ); //debug
+        while( ++patch_row < KERNEL_HEIGHT );
+
+        src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
+    }
+    //while ( ++patch_depth < 1 );  //debug
+    while ( ++patch_depth < INPUT_DEPTH );
+
+    // Dst resembles a cube of width x height x (output channel * batches).  Each tile writes:
+    // (SIMD * TILE_M) x 1 x TILE_N.  Partial writes most likely generated if padding used.
+    int out_offset = global_z * out_pitch_z                                                   // batch offset
+     + ( group_x * TILE_N ) * out_pitch_y                                       // channel offset
+     + ( ( global_y * TILE_M ) / output_width + OUT_PADDING_HEIGHT) * OUT_PITCH_X  // y offset
+     + ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT;               // x offset
+    __global Dtype *out = dst + out_offset;
+
+#if APPLY_BIAS
+    Dtype bias[2];
+    Dtype2 *bias_vec;
+    bias_vec = (Dtype2*)bias;
+    *bias_vec = as_Dtype2(SUB_GROUP_BLOCK_READ2((__global INT_TYPE *)biases_base + group_x * TILE_N));
+#endif
+#ifdef FUSED_CONV_CHANNEL_RELU
+        Dtype slope[2];
+        Dtype2 *slope_vec;
+        slope_vec = (Dtype2*)slope;
+        *slope_vec = as_Dtype2(SUB_GROUP_BLOCK_READ2((__global INT_TYPE *)negative_slope_base + group_x * TILE_N));
+        Dtype negative_slope;
+#endif
+
+    INTERLEAVED_SIMD16_OUTPUT(dst, out_offset, 0);
+}
+#endif
+#endif // KERNEL_BASIC/IDLF/GEMM_LIKE
diff --git a/modules/dnn/src/opencl/conv_spatial_helper.cl b/modules/dnn/src/opencl/conv_spatial_helper.cl
new file mode 100644 (file)
index 0000000..9d5a89f
--- /dev/null
@@ -0,0 +1,73 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#define CONCAT(A,B) A##_##B
+#define TEMPLATE(name,type) CONCAT(name,type)
+#define Dtype float
+
+__kernel void TEMPLATE(copyWeightsSwizzled, Dtype)
+    (__global Dtype* weightIn,
+     __global Dtype* weightOut,
+     const int kernel_w,
+     const int kernel_h,
+     const int channels,
+     const int outputs,
+     const int swizzleFactor) {
+
+  unsigned int sX = get_global_id(0);
+
+  //Original location
+
+  //Output location
+  int outputSublayer = channels / swizzleFactor;
+  int outputSublayerIndex = channels % swizzleFactor;
+
+  int filter = sX / (kernel_w*kernel_h*channels);
+  int kernel_X = sX % kernel_w;
+  int kernel_Y = (sX / kernel_w) % kernel_h;
+  int kernel_C = (sX / (kernel_w * kernel_h)) % channels;
+
+  int FP = filter / swizzleFactor;
+  int F1 = filter % swizzleFactor;
+
+  weightOut[FP*(kernel_w*kernel_h*channels*swizzleFactor) + kernel_C*(kernel_w*kernel_h*swizzleFactor) + kernel_Y*(kernel_w*swizzleFactor) + kernel_X*swizzleFactor + F1]
+  = weightIn[filter*(kernel_w*kernel_h*channels) + kernel_C*(kernel_w*kernel_h) + kernel_Y*kernel_w + kernel_X];
+}
diff --git a/modules/dnn/src/opencl/dummy.cl b/modules/dnn/src/opencl/dummy.cl
new file mode 100644 (file)
index 0000000..6a55938
--- /dev/null
@@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+__kernel void dummy_kernel()
+{
+}
diff --git a/modules/dnn/src/opencl/gemm_image.cl b/modules/dnn/src/opencl/gemm_image.cl
new file mode 100644 (file)
index 0000000..37ae523
--- /dev/null
@@ -0,0 +1,635 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#define CONCAT(A,B) A##_##B
+#define TEMPLATE(name,type) CONCAT(name,type)
+
+// Types used for parameters, offset computations and so on
+#define int_tp int
+#define uint_tp unsigned int
+
+#define Dtype  float
+#define Dtype2 float2
+#define Dtype4 float4
+#define Dtype8 float8
+
+#define as_Dtype  as_float
+#define as_Dtype2 as_float2
+#define as_Dtype4 as_float4
+#define as_Dtype8 as_float8
+
+#define KERNEL_ARG_DTYPE float
+
+#if defined(cl_intel_subgroups)
+#pragma OPENCL EXTENSION  cl_intel_subgroups : enable
+#endif
+
+#define TILE_M          32
+#define TILE_K          8
+
+// common block to calculate (alpha * AxB + beta * C) and output to destination image.
+
+#define SUBGROUP_BLOCK_READ8( __image, __coord ) intel_sub_group_block_read8( __image, __coord )
+#define SHUFFLE_TYPE2(val) val
+#define SHUFFLE_TYPE8(val) val
+#define READ_IMAGE(__image, __coord) read_imagef(__image, sampler, __coord)
+#define SIZE_OF_ELEMENT sizeof(uint)
+#define SIMD_SIZE_GEMM 8
+#define TILE_N 8
+
+//#define USE_IMAGE_C
+#ifdef USE_IMAGE_C
+#define BLOCKC_READ8( _C, _coordC ) as_Dtype8( intel_sub_group_block_read8( _C, _coordC ) )
+#define BLOCKC_WRITE8( _C, _coordC, _val ) intel_sub_group_block_write8( _C, _coordC, as_uint8( _val ) )
+#define MATC_PARAMETER __read_only image2d_t C, __write_only image2d_t dst
+#define GEMM_OUTPUT(ALPHA1, BETA_NOT0) GEMM_OUTPUT_EXT(ALPHA1, BETA_NOT0, C, dst, sizeof(uint))
+#else
+#define BLOCKC_READ8( _C, _coordC ) \
+          (Dtype8) ( (_coordC.x + get_local_id(0) < N && _coordC.y < M) ? _C[ _coordC.y * ldc + _coordC.x + get_local_id(0) ] : 0, \
+                     (_coordC.x + get_local_id(0) < N && _coordC.y + 1 < M) ? _C[ ( _coordC.y + 1 ) * ldc + _coordC.x + get_local_id(0) ] : 0, \
+                     (_coordC.x + get_local_id(0) < N && _coordC.y + 2 < M) ? _C[ ( _coordC.y + 2 ) * ldc + _coordC.x + get_local_id(0) ] : 0, \
+                     (_coordC.x + get_local_id(0) < N && _coordC.y + 3 < M) ? _C[ ( _coordC.y + 3 ) * ldc + _coordC.x + get_local_id(0) ] : 0, \
+                     (_coordC.x + get_local_id(0) < N && _coordC.y + 4 < M) ? _C[ ( _coordC.y + 4 ) * ldc + _coordC.x + get_local_id(0) ] : 0, \
+                     (_coordC.x + get_local_id(0) < N && _coordC.y + 5 < M) ? _C[ ( _coordC.y + 5 ) * ldc + _coordC.x + get_local_id(0) ] : 0, \
+                     (_coordC.x + get_local_id(0) < N && _coordC.y + 6 < M) ? _C[ ( _coordC.y + 6 ) * ldc + _coordC.x + get_local_id(0) ] : 0, \
+                     (_coordC.x + get_local_id(0) < N && _coordC.y + 7 < M) ? _C[ ( _coordC.y + 7 ) * ldc + _coordC.x + get_local_id(0) ] : 0)
+
+#define BLOCKC_WRITE8( _C, _coordC, _val) do {\
+                     if (_coordC.x + get_local_id(0) < N) { \
+                       if (_coordC.y < M) \
+                         _C[ _coordC.y * ldc + _coordC.x + get_local_id(0) ] = _val.s0; \
+                       if (_coordC.y + 1 < M) \
+                         _C[ ( _coordC.y + 1 )* ldc + _coordC.x + get_local_id(0) ] = _val.s1; \
+                       if (_coordC.y + 2 < M) \
+                         _C[ ( _coordC.y + 2 )* ldc + _coordC.x + get_local_id(0) ] = _val.s2; \
+                       if (_coordC.y + 3 < M) \
+                         _C[ ( _coordC.y + 3 )* ldc + _coordC.x + get_local_id(0) ] = _val.s3; \
+                       if (_coordC.y + 4 < M) \
+                         _C[ ( _coordC.y + 4 )* ldc + _coordC.x + get_local_id(0) ] = _val.s4; \
+                       if (_coordC.y + 5 < M) \
+                         _C[ ( _coordC.y + 5 )* ldc + _coordC.x + get_local_id(0) ] = _val.s5; \
+                       if (_coordC.y + 6 < M) \
+                         _C[ ( _coordC.y + 6 )* ldc + _coordC.x + get_local_id(0) ] = _val.s6; \
+                       if (_coordC.y + 7 < M) \
+                         _C[ ( _coordC.y + 7 )* ldc + _coordC.x + get_local_id(0) ] = _val.s7; \
+                     }} while(0)
+#define MATC_PARAMETER __global Dtype * C, const int offC, const int M, const int N, const int ldc
+#define GEMM_OUTPUT(ALPHA1, BETA_NOT0) GEMM_OUTPUT_EXT(ALPHA1, BETA_NOT0, (C + offC), (C + offC), 1)
+#endif
+
+#define GEMM_OUTPUT_EXT(ALPHA1, BETA_NOT0, _C, _dst, _C_step) \
+    int2    coordDst = (int2)( ( group_x * TILE_N ) * _C_step, ( group_y * TILE_M ) ); \
+    int2    coordC = coordDst; \
+    Dtype8 blockC00; \
+    Dtype8 blockC01; \
+    Dtype8 blockC02; \
+    Dtype8 blockC03; \
+    if (BETA_NOT0) { \
+        blockC00 = isFirstColBlock ? BLOCKC_READ8( _C, coordC ) * beta : BLOCKC_READ8( _C, coordC );    coordC.y += 8; \
+        blockC01 = isFirstColBlock ? BLOCKC_READ8( _C, coordC ) * beta : BLOCKC_READ8( _C, coordC );    coordC.y += 8; \
+        blockC02 = isFirstColBlock ? BLOCKC_READ8( _C, coordC ) * beta : BLOCKC_READ8( _C, coordC );    coordC.y += 8; \
+        blockC03 = isFirstColBlock ? BLOCKC_READ8( _C, coordC ) * beta : BLOCKC_READ8( _C, coordC ); \
+        if (!ALPHA1) { \
+            blockC00 = mad(blockAxB00, (Dtype8)alpha, blockC00); \
+            blockC01 = mad(blockAxB01, (Dtype8)alpha, blockC01); \
+            blockC02 = mad(blockAxB02, (Dtype8)alpha, blockC02); \
+            blockC03 = mad(blockAxB03, (Dtype8)alpha, blockC03); \
+        } else { \
+            blockC00 += blockAxB00; \
+            blockC01 += blockAxB01; \
+            blockC02 += blockAxB02; \
+            blockC03 += blockAxB03; \
+        } \
+    } else { \
+        blockC00 = isFirstColBlock ? BLOCKC_READ8( _C, coordC ) * beta : BLOCKC_READ8( _C, coordC );    coordC.y += 8; \
+        blockC01 = isFirstColBlock ? BLOCKC_READ8( _C, coordC ) * beta : BLOCKC_READ8( _C, coordC );    coordC.y += 8; \
+        blockC02 = isFirstColBlock ? BLOCKC_READ8( _C, coordC ) * beta : BLOCKC_READ8( _C, coordC );    coordC.y += 8; \
+        blockC03 = isFirstColBlock ? BLOCKC_READ8( _C, coordC ) * beta : BLOCKC_READ8( _C, coordC ); \
+        if (!ALPHA1) { \
+          blockC00 = mad(blockAxB00, (Dtype8)alpha, blockC00); \
+          blockC01 = mad(blockAxB01, (Dtype8)alpha, blockC01); \
+          blockC02 = mad(blockAxB02, (Dtype8)alpha, blockC02); \
+          blockC03 = mad(blockAxB03, (Dtype8)alpha, blockC03); \
+        } else { \
+          blockC00 += blockAxB00; \
+          blockC01 += blockAxB01; \
+          blockC02 += blockAxB02; \
+          blockC03 += blockAxB03; \
+        } \
+    } \
+    BLOCKC_WRITE8( _dst, coordDst, blockC00 );    coordDst.y += 8; \
+    BLOCKC_WRITE8( _dst, coordDst, blockC01 );    coordDst.y += 8; \
+    BLOCKC_WRITE8( _dst, coordDst, blockC02 );    coordDst.y += 8; \
+    BLOCKC_WRITE8( _dst, coordDst, blockC03 );
+
+// Get the specified column of the block of the block
+#define TRANSPOSE_BLOCK_8( _block, _col )   \
+        (Dtype8)( intel_sub_group_shuffle( _block.s0, _col ),   \
+                  intel_sub_group_shuffle( _block.s1, _col ),   \
+                  intel_sub_group_shuffle( _block.s2, _col ),   \
+                  intel_sub_group_shuffle( _block.s3, _col ),   \
+                  intel_sub_group_shuffle( _block.s4, _col ),   \
+                  intel_sub_group_shuffle( _block.s5, _col ),   \
+                  intel_sub_group_shuffle( _block.s6, _col ),   \
+                  intel_sub_group_shuffle( _block.s7, _col ) );
+
+// A's column block multiply B 's row block.
+#define MULTIPLY_BLOCKS_8x8( _result, _blockA, _blockB )    \
+        {   \
+            const Dtype8    acol0 = TRANSPOSE_BLOCK_8( _blockA, 0 );    \
+            const Dtype8    acol1 = TRANSPOSE_BLOCK_8( _blockA, 1 );    \
+            const Dtype8    acol2 = TRANSPOSE_BLOCK_8( _blockA, 2 );    \
+            const Dtype8    acol3 = TRANSPOSE_BLOCK_8( _blockA, 3 );    \
+            const Dtype8    acol4 = TRANSPOSE_BLOCK_8( _blockA, 4 );    \
+            const Dtype8    acol5 = TRANSPOSE_BLOCK_8( _blockA, 5 );    \
+            const Dtype8    acol6 = TRANSPOSE_BLOCK_8( _blockA, 6 );    \
+            const Dtype8    acol7 = TRANSPOSE_BLOCK_8( _blockA, 7 );    \
+            _result = mad( (Dtype8)(_blockB.s0), acol0, _result );      \
+            _result = mad( (Dtype8)(_blockB.s1), acol1, _result );      \
+            _result = mad( (Dtype8)(_blockB.s2), acol2, _result );      \
+            _result = mad( (Dtype8)(_blockB.s3), acol3, _result );      \
+            _result = mad( (Dtype8)(_blockB.s4), acol4, _result );      \
+            _result = mad( (Dtype8)(_blockB.s5), acol5, _result );      \
+            _result = mad( (Dtype8)(_blockB.s6), acol6, _result );      \
+            _result = mad( (Dtype8)(_blockB.s7), acol7, _result );      \
+        }
+
+#define GEMM_NN(ALPHA1, BETA_NOT0) \
+__attribute__((intel_reqd_sub_group_size(SIMD_SIZE_GEMM))) \
+__attribute__((reqd_work_group_size(SIMD_SIZE_GEMM, 1, 1))) \
+__kernel void TEMPLATE(gemm_32_1_NN_ ##ALPHA1 ##_ ##BETA_NOT0, Dtype)( \
+    __read_only image2d_t A, \
+    __read_only image2d_t B, \
+    MATC_PARAMETER, \
+    KERNEL_ARG_DTYPE alpha_in, \
+    KERNEL_ARG_DTYPE beta_in, \
+    int width0, \
+    int isFirstColBlock) \
+{ \
+    const Dtype alpha = (Dtype)alpha_in; \
+    const Dtype beta = (Dtype)beta_in; \
+    const int group_x = get_group_id(0); \
+    const int group_y = get_group_id(1); \
+    Dtype8 blockAxB00 = 0.0f; \
+    Dtype8 blockAxB01 = 0.0f; \
+    Dtype8 blockAxB02 = 0.0f; \
+    Dtype8 blockAxB03 = 0.0f; \
+    int2    coordA = (int2)( 0, group_y * TILE_M ); \
+    int2    coordB = (int2)( ( group_x * TILE_N ) * SIZE_OF_ELEMENT, 0 ); \
+    do \
+    {  \
+        int2    coordBTemp = coordB; \
+        Dtype8  blockB00 = as_Dtype8( SUBGROUP_BLOCK_READ8( B, coordBTemp ) );    coordB.y += TILE_K; \
+        int2    coordATemp = coordA; \
+        Dtype8  blockA00 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordATemp.y += 8; \
+        Dtype8  blockA01 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordATemp.y += 8; \
+        Dtype8  blockA02 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordATemp.y += 8; \
+        Dtype8  blockA03 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordA.x += TILE_K * SIZE_OF_ELEMENT; \
+        MULTIPLY_BLOCKS_8x8( blockAxB00, blockA00, blockB00 ); \
+        MULTIPLY_BLOCKS_8x8( blockAxB01, blockA01, blockB00 ); \
+        MULTIPLY_BLOCKS_8x8( blockAxB02, blockA02, blockB00 ); \
+        MULTIPLY_BLOCKS_8x8( blockAxB03, blockA03, blockB00 ); \
+    } \
+    while( coordB.y < width0 ); \
+    GEMM_OUTPUT(ALPHA1, BETA_NOT0); \
+}
+
+GEMM_NN(1, 0) // ALPHA == 1, BETA == 0
+GEMM_NN(1, 1) // ALPHA == 1, BETA != 0
+GEMM_NN(0, 0) // ALPHA != 1, BETA == 0
+GEMM_NN(0, 1) // ALPHA != 1, BETA != 0
+
+#undef TRANSPOSE_BLOCK_8
+#undef MULTIPLY_BLOCKS_8x8
+#undef GEMM_NN
+
+// replicate the first row to column block.
+#define TRANSPOSE_BLOCK_8(_vec, _col) \
+        (Dtype8)( intel_sub_group_shuffle(_vec, _col + 0), \
+                  intel_sub_group_shuffle(_vec, _col + 1), \
+                  intel_sub_group_shuffle(_vec, _col + 2), \
+                  intel_sub_group_shuffle(_vec, _col + 3), \
+                  intel_sub_group_shuffle(_vec, _col + 4), \
+                  intel_sub_group_shuffle(_vec, _col + 5), \
+                  intel_sub_group_shuffle(_vec, _col + 6), \
+                  intel_sub_group_shuffle(_vec, _col + 7) )
+
+#define MULTIPLY_BLOCKS_8x8( _result, _blockA, _blockB, _col )    \
+        {   \
+            _result = mad( (Dtype8)(_blockB.s0), TRANSPOSE_BLOCK_8(_blockA.s0, _col), _result );      \
+            _result = mad( (Dtype8)(_blockB.s1), TRANSPOSE_BLOCK_8(_blockA.s1, _col), _result );      \
+            _result = mad( (Dtype8)(_blockB.s2), TRANSPOSE_BLOCK_8(_blockA.s2, _col), _result );      \
+            _result = mad( (Dtype8)(_blockB.s3), TRANSPOSE_BLOCK_8(_blockA.s3, _col), _result );      \
+            _result = mad( (Dtype8)(_blockB.s4), TRANSPOSE_BLOCK_8(_blockA.s4, _col), _result );      \
+            _result = mad( (Dtype8)(_blockB.s5), TRANSPOSE_BLOCK_8(_blockA.s5, _col), _result );      \
+            _result = mad( (Dtype8)(_blockB.s6), TRANSPOSE_BLOCK_8(_blockA.s6, _col), _result );      \
+            _result = mad( (Dtype8)(_blockB.s7), TRANSPOSE_BLOCK_8(_blockA.s7, _col), _result );      \
+        }
+
+#define GEMM_TN(ALPHA1, BETA_NOT0) \
+__attribute__((intel_reqd_sub_group_size(SIMD_SIZE_GEMM))) \
+__attribute__((reqd_work_group_size(SIMD_SIZE_GEMM, 1, 1))) \
+__kernel void TEMPLATE(gemm_32_1_TN_ ##ALPHA1 ##_ ##BETA_NOT0,Dtype)( \
+    __read_only image2d_t A, \
+    __read_only image2d_t B, \
+    MATC_PARAMETER, \
+    KERNEL_ARG_DTYPE alpha_in, \
+    KERNEL_ARG_DTYPE beta_in, \
+    int width0, \
+    int isFirstColBlock) \
+{ \
+    const Dtype alpha = (Dtype)alpha_in; \
+    const Dtype beta = (Dtype)beta_in; \
+    const int group_x = get_group_id(0);\
+    const int group_y = get_group_id(1);\
+    Dtype8 blockAxB00 = 0.0f;\
+    Dtype8 blockAxB01 = 0.0f;\
+    Dtype8 blockAxB02 = 0.0f;\
+    Dtype8 blockAxB03 = 0.0f;\
+    int2    coordA = (int2)( group_y * TILE_M * SIZE_OF_ELEMENT, 0 );\
+    int2    coordB = (int2)( ( group_x * TILE_N ) * SIZE_OF_ELEMENT, 0 );\
+    do\
+    {\
+        int2    coordBTemp = coordB;\
+        Dtype8 blockB00 = as_Dtype8( SUBGROUP_BLOCK_READ8( B, coordBTemp ) );    coordB.y += TILE_K;\
+        int2    coordATemp = coordA;\
+        Dtype8 blockA00 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordATemp.x += 8 * SIZE_OF_ELEMENT;\
+        Dtype8 blockA01 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordATemp.x += 8 * SIZE_OF_ELEMENT;\
+        Dtype8 blockA02 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordATemp.x += 8 * SIZE_OF_ELEMENT;\
+        Dtype8 blockA03 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordA.y += TILE_K;\
+        MULTIPLY_BLOCKS_8x8( blockAxB00, blockA00, blockB00, 0 ); \
+        MULTIPLY_BLOCKS_8x8( blockAxB01, blockA01, blockB00, 0 ); \
+        MULTIPLY_BLOCKS_8x8( blockAxB02, blockA02, blockB00, 0 ); \
+        MULTIPLY_BLOCKS_8x8( blockAxB03, blockA03, blockB00, 0 ); \
+    } \
+    while( coordB.y < width0 ); \
+    GEMM_OUTPUT(ALPHA1, BETA_NOT0); \
+}
+
+GEMM_TN(1, 0) // ALPHA == 1, BETA == 0
+GEMM_TN(1, 1) // ALPHA == 1, BETA != 0
+GEMM_TN(0, 0) // ALPHA != 1, BETA == 0
+GEMM_TN(0, 1) // ALPHA != 1, BETA != 0
+
+#undef MULTIPLY_BLOCKS_8x8
+#undef TRANSPOSE_BLOCK_8
+#undef GEMM_TN
+
+// The same as GEMM_NN
+#define TRANSPOSE_BLOCK_8( _block, _col )   \
+        (Dtype8)( intel_sub_group_shuffle( _block.s0, _col),   \
+                  intel_sub_group_shuffle( _block.s1, _col),   \
+                  intel_sub_group_shuffle( _block.s2, _col),   \
+                  intel_sub_group_shuffle( _block.s3, _col),   \
+                  intel_sub_group_shuffle( _block.s4, _col),   \
+                  intel_sub_group_shuffle( _block.s5, _col),   \
+                  intel_sub_group_shuffle( _block.s6, _col),   \
+                  intel_sub_group_shuffle( _block.s7, _col) )
+
+#define MULTIPLY_BLOCKS_8x8( _result, _blockA, _blockB )    \
+        {   \
+            const Dtype8    acol0 = TRANSPOSE_BLOCK_8( _blockA, 0 );    \
+            const Dtype8    acol1 = TRANSPOSE_BLOCK_8( _blockA, 1 );    \
+            const Dtype8    acol2 = TRANSPOSE_BLOCK_8( _blockA, 2 );    \
+            const Dtype8    acol3 = TRANSPOSE_BLOCK_8( _blockA, 3 );    \
+            const Dtype8    acol4 = TRANSPOSE_BLOCK_8( _blockA, 4 );    \
+            const Dtype8    acol5 = TRANSPOSE_BLOCK_8( _blockA, 5 );    \
+            const Dtype8    acol6 = TRANSPOSE_BLOCK_8( _blockA, 6 );    \
+            const Dtype8    acol7 = TRANSPOSE_BLOCK_8( _blockA, 7 );    \
+            _result = mad( (Dtype8)_blockB.s0, acol0, _result );      \
+            _result = mad( (Dtype8)_blockB.s1, acol1, _result );      \
+            _result = mad( (Dtype8)_blockB.s2, acol2, _result );      \
+            _result = mad( (Dtype8)_blockB.s3, acol3, _result );      \
+            _result = mad( (Dtype8)_blockB.s4, acol4, _result );      \
+            _result = mad( (Dtype8)_blockB.s5, acol5, _result );      \
+            _result = mad( (Dtype8)_blockB.s6, acol6, _result );      \
+            _result = mad( (Dtype8)_blockB.s7, acol7, _result );      \
+        }
+
+#define GEMM_NT(ALPHA1, BETA_NOT0, VECSCALAR, VECSIZE) \
+__attribute__((intel_reqd_sub_group_size(SIMD_SIZE_GEMM))) \
+__attribute__((reqd_work_group_size(SIMD_SIZE_GEMM, 1, 1))) \
+__kernel void TEMPLATE(gemm_32_1_NT_ ##VECSCALAR ##_ ##ALPHA1 ##_ ##BETA_NOT0,Dtype)( \
+    __read_only image2d_t A, \
+    MATB_PARAMETER, \
+    MATC_PARAMETER, \
+    KERNEL_ARG_DTYPE alpha_in, \
+    KERNEL_ARG_DTYPE beta_in, \
+    int padded_k, \
+    int k, \
+    int isFirstColBlock) \
+{ \
+    const Dtype alpha = (Dtype)alpha_in; \
+    const Dtype beta = (Dtype)beta_in; \
+    const int group_x = get_group_id(0); \
+    const int group_y = get_group_id(1); \
+    Dtype8 blockAxB00 = 0.0f; \
+    Dtype8 blockAxB01 = 0.0f; \
+    Dtype8 blockAxB02 = 0.0f; \
+    Dtype8 blockAxB03 = 0.0f; \
+    int2    coordA = (int2)( 0, group_y * TILE_M ); \
+    int2    coordB = (int2)( 0, ( group_x * TILE_N )); \
+    const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; \
+    do \
+    { \
+        Dtype8 blockB00;  \
+        BLOCKB_READ8(blockB00, B, coordB); \
+        int2    coordATemp = coordA; \
+        Dtype8 blockA00 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordATemp.y += 8; \
+        Dtype8 blockA01 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordATemp.y += 8; \
+        Dtype8 blockA02 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordATemp.y += 8; \
+        Dtype8 blockA03 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordA.x += TILE_K * SIZE_OF_ELEMENT; \
+        MULTIPLY_BLOCKS_8x8( blockAxB00, blockA00, blockB00 ); \
+        MULTIPLY_BLOCKS_8x8( blockAxB01, blockA01, blockB00 ); \
+        MULTIPLY_BLOCKS_8x8( blockAxB02, blockA02, blockB00 ); \
+        MULTIPLY_BLOCKS_8x8( blockAxB03, blockA03, blockB00 ); \
+    } \
+    while( coordB.x < padded_k / VECSIZE ); \
+    GEMM_OUTPUT(ALPHA1, BETA_NOT0); \
+}
+
+#define BLOCKB_READ8(_blockb, _B, _coordB) \
+        int2 _coordBTemp = _coordB; \
+        _coordBTemp.y += get_local_id(0); \
+        _blockb.s0123 = READ_IMAGE(_B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s4567 = READ_IMAGE(_B, _coordBTemp); _coordB.x += 2;
+
+#define MATB_PARAMETER __read_only image2d_t B
+
+GEMM_NT(1, 0, VEC4, 4) // ALPHA == 1, BETA == 0
+GEMM_NT(1, 1, VEC4, 4) // ALPHA == 1, BETA != 0
+GEMM_NT(0, 0, VEC4, 4) // ALPHA != 1, BETA == 0
+GEMM_NT(0, 1, VEC4, 4) // ALPHA != 1, BETA != 0
+#undef BLOCKB_READ8
+#undef MATB_PARAMETER
+
+#define BLOCKB_READ8(_blockb, _B, _coordB) \
+        int2 _coordBTemp = _coordB; \
+        _coordBTemp.y += get_local_id(0); \
+        const __global Dtype *B_read = (__global Dtype *)(_B + (_coordBTemp.y * ldb) + _coordBTemp.x + offB); \
+        _blockb = vload8(0, B_read); \
+        _coordB.x += TILE_K;
+
+#define MATB_PARAMETER __global Dtype *B, int offB, int ldb
+
+GEMM_NT(1, 0, BUFFER, 1) // ALPHA == 1, BETA == 0
+GEMM_NT(1, 1, BUFFER, 1) // ALPHA == 1, BETA != 0
+GEMM_NT(0, 0, BUFFER, 1) // ALPHA != 1, BETA == 0
+GEMM_NT(0, 1, BUFFER, 1) // ALPHA != 1, BETA != 0
+#undef BLOCKB_READ8
+#undef MATB_PARAMETER
+
+#define BLOCKB_READ8(_blockb, _B, _coordB) \
+        int2 _coordBTemp = _coordB; \
+        _coordBTemp.y += get_local_id(0); \
+        Dtype4 temp; \
+        temp = READ_IMAGE(_B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s0 = temp.s0; \
+        temp = READ_IMAGE(_B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s1 = temp.s0; \
+        temp = READ_IMAGE(_B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s2 = temp.s0; \
+        temp = READ_IMAGE(_B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s3 = temp.s0; \
+        temp = READ_IMAGE(_B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s4 = temp.s0; \
+        temp = READ_IMAGE(_B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s5 = temp.s0; \
+        temp = READ_IMAGE(_B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s6 = temp.s0; \
+        temp = READ_IMAGE(_B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s7 = temp.s0; \
+        _coordB.x += 8;
+
+#define MATB_PARAMETER __read_only image2d_t B
+
+GEMM_NT(1, 0, SCALAR, 1) // ALPHA == 1, BETA == 0
+GEMM_NT(1, 1, SCALAR, 1) // ALPHA == 1, BETA != 0
+GEMM_NT(0, 0, SCALAR, 1) // ALPHA != 1, BETA == 0
+GEMM_NT(0, 1, SCALAR, 1) // ALPHA != 1, BETA != 0
+#undef BLOCKB_READ8
+#undef MATB_PARAMETER
+
+#undef MULTIPLY_BLOCKS_8x8
+#undef TRANSPOSE_BLOCK_8
+#undef GEMM_NT
+
+//The same as GEMM_TN.
+#define TRANSPOSE_BLOCK_8(_vec, _col) \
+        (Dtype8)( intel_sub_group_shuffle(_vec, _col + 0), \
+                  intel_sub_group_shuffle(_vec, _col + 1), \
+                  intel_sub_group_shuffle(_vec, _col + 2), \
+                  intel_sub_group_shuffle(_vec, _col + 3), \
+                  intel_sub_group_shuffle(_vec, _col + 4), \
+                  intel_sub_group_shuffle(_vec, _col + 5), \
+                  intel_sub_group_shuffle(_vec, _col + 6), \
+                  intel_sub_group_shuffle(_vec, _col + 7) );
+
+#define MULTIPLY_BLOCKS_8x8( _result, _blockA, _blockB, _col )    \
+        {   \
+            const Dtype8    acol0 = TRANSPOSE_BLOCK_8( _blockA.s0, _col );    \
+            const Dtype8    acol1 = TRANSPOSE_BLOCK_8( _blockA.s1, _col );    \
+            const Dtype8    acol2 = TRANSPOSE_BLOCK_8( _blockA.s2, _col );    \
+            const Dtype8    acol3 = TRANSPOSE_BLOCK_8( _blockA.s3, _col );    \
+            const Dtype8    acol4 = TRANSPOSE_BLOCK_8( _blockA.s4, _col );    \
+            const Dtype8    acol5 = TRANSPOSE_BLOCK_8( _blockA.s5, _col );    \
+            const Dtype8    acol6 = TRANSPOSE_BLOCK_8( _blockA.s6, _col );    \
+            const Dtype8    acol7 = TRANSPOSE_BLOCK_8( _blockA.s7, _col );    \
+            _result = mad( (Dtype8)_blockB.s0, acol0, _result );      \
+            _result = mad( (Dtype8)_blockB.s1, acol1, _result );      \
+            _result = mad( (Dtype8)_blockB.s2, acol2, _result );      \
+            _result = mad( (Dtype8)_blockB.s3, acol3, _result );      \
+            _result = mad( (Dtype8)_blockB.s4, acol4, _result );      \
+            _result = mad( (Dtype8)_blockB.s5, acol5, _result );      \
+            _result = mad( (Dtype8)_blockB.s6, acol6, _result );      \
+            _result = mad( (Dtype8)_blockB.s7, acol7, _result );      \
+        }
+
+#define GEMM_TT(ALPHA1, BETA_NOT0, VECSCALAR, VECSIZE) \
+__attribute__((intel_reqd_sub_group_size(SIMD_SIZE_GEMM))) \
+__attribute__((reqd_work_group_size(SIMD_SIZE_GEMM, 1, 1))) \
+__kernel void TEMPLATE(gemm_32_1_TT_ ##VECSCALAR ##_ ##ALPHA1 ##_ ##BETA_NOT0, Dtype)( \
+    __read_only image2d_t A, \
+    MATB_PARAMETER, \
+    MATC_PARAMETER, \
+    KERNEL_ARG_DTYPE alpha_in, \
+    KERNEL_ARG_DTYPE beta_in, \
+    int padded_k, \
+    int k, \
+    int isFirstColBlock) \
+{ \
+    const Dtype alpha = (Dtype)alpha_in; \
+    const Dtype beta = (Dtype)beta_in; \
+    const int group_x = get_group_id(0); \
+    const int group_y = get_group_id(1); \
+    Dtype8 blockAxB00 = 0.0f; \
+    Dtype8 blockAxB01 = 0.0f; \
+    Dtype8 blockAxB02 = 0.0f; \
+    Dtype8 blockAxB03 = 0.0f; \
+    int2    coordA = (int2)( group_y * TILE_M * SIZE_OF_ELEMENT, 0 ); \
+    int2    coordB = (int2)( 0, ( group_x * TILE_N )); \
+    const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; \
+    do \
+    { \
+        Dtype8 blockB00;             \
+        BLOCKB_READ8(blockB00, B, coordB); \
+        int2    coordATemp = coordA; \
+        Dtype8 blockA00 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordATemp.x += 8 * SIZE_OF_ELEMENT; \
+        Dtype8 blockA01 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordATemp.x += 8 * SIZE_OF_ELEMENT; \
+        Dtype8 blockA02 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordATemp.x += 8 * SIZE_OF_ELEMENT; \
+        Dtype8 blockA03 = as_Dtype8( SUBGROUP_BLOCK_READ8( A, coordATemp ) );    coordA.y += TILE_K; \
+        MULTIPLY_BLOCKS_8x8( blockAxB00, blockA00 , blockB00, 0 ); \
+        MULTIPLY_BLOCKS_8x8( blockAxB01, blockA01 , blockB00, 0 ); \
+        MULTIPLY_BLOCKS_8x8( blockAxB02, blockA02 , blockB00, 0 ); \
+        MULTIPLY_BLOCKS_8x8( blockAxB03, blockA03 , blockB00, 0 ); \
+    } \
+    while( coordB.x < padded_k / VECSIZE ); \
+    GEMM_OUTPUT(ALPHA1, BETA_NOT0);\
+}
+
+#define BLOCKB_READ8(_blockb, _B, _coordB) \
+        int2 _coordBTemp = _coordB; \
+        _coordBTemp.y += get_local_id(0); \
+        _blockb.s0123 = READ_IMAGE(_B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s4567 = READ_IMAGE(_B, _coordBTemp); _coordB.x += 2;
+
+#define MATB_PARAMETER __read_only image2d_t B
+
+GEMM_TT(1, 0, VEC4, 4) // ALPHA == 1, BETA == 0
+GEMM_TT(1, 1, VEC4, 4) // ALPHA == 1, BETA != 0
+GEMM_TT(0, 0, VEC4, 4) // ALPHA != 1, BETA == 0
+GEMM_TT(0, 1, VEC4, 4) // ALPHA != 1, BETA != 0
+#undef BLOCKB_READ8
+#undef MATB_PARAMETER
+
+#define BLOCKB_READ8(_blockb, _B, _coordB) \
+        int2 _coordBTemp = _coordB; \
+        _coordBTemp.y += get_local_id(0); \
+        const __global Dtype *B_read = (__global Dtype *)(_B + (_coordBTemp.y * k) + _coordBTemp.x + offB); \
+        _blockb = vload8(0, B_read); \
+        _coordB.x += TILE_K;
+
+#define MATB_PARAMETER __global Dtype *B, int offB, int ldb
+
+GEMM_TT(1, 0, BUFFER, 1) // ALPHA == 1, BETA == 0
+GEMM_TT(1, 1, BUFFER, 1) // ALPHA == 1, BETA != 0
+GEMM_TT(0, 0, BUFFER, 1) // ALPHA != 1, BETA == 0
+GEMM_TT(0, 1, BUFFER, 1) // ALPHA != 1, BETA != 0
+#undef BLOCKB_READ8
+#undef MATB_PARAMETER
+
+#define BLOCKB_READ8(_blockb, _B, _coordB) \
+        int2 _coordBTemp = _coordB; \
+        _coordBTemp.y += get_local_id(0); \
+        Dtype4 temp; \
+        temp = READ_IMAGE(B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s0 = temp.s0; \
+        temp = READ_IMAGE(B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s1 = temp.s0; \
+        temp = READ_IMAGE(B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s2 = temp.s0; \
+        temp = READ_IMAGE(B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s3 = temp.s0; \
+        temp = READ_IMAGE(B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s4 = temp.s0; \
+        temp = READ_IMAGE(B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s5 = temp.s0; \
+        temp = READ_IMAGE(B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s6 = temp.s0; \
+        temp = READ_IMAGE(B, _coordBTemp); _coordBTemp.x += 1; \
+        _blockb.s7 = temp.s0; \
+        _coordB.x += 8;
+
+#define MATB_PARAMETER __read_only image2d_t B
+
+GEMM_TT(1, 0, SCALAR, 1) // ALPHA == 1, BETA == 0
+GEMM_TT(1, 1, SCALAR, 1) // ALPHA == 1, BETA != 0
+GEMM_TT(0, 0, SCALAR, 1) // ALPHA != 1, BETA == 0
+GEMM_TT(0, 1, SCALAR, 1) // ALPHA != 1, BETA != 0
+#undef BLOCKB_READ8
+#undef MATB_PARAMETER
+
+#undef MULTIPLY_BLOCKS_8x8
+#undef TRANSPOSE_BLOCK_8
+#undef GEMM_TT
+
+#undef TILE_M
+#undef TILE_K
+#undef TILE_N
+#undef SUBGROUP_BLOCK_READ8
+#undef READ_IMAGE
+#undef SIZE_OF_ELEMENT
+
+__kernel void TEMPLATE(gemm_buffer_copy_image_transpose,Dtype)(
+    __global Dtype* A,
+    __write_only image2d_t ImA,
+    int offA,
+    int width,
+    int height,
+    int ldA)
+{
+    const int gidx = get_global_id(0);
+    const int gidy = get_global_id(1);
+    int2 coord_dst = (int2)(gidx, gidy);
+    __global Dtype* A_off = A + offA;
+    Dtype srcA = A_off[gidy * ldA + gidx];
+    write_imagef(ImA, coord_dst, (Dtype4)srcA);
+}
+
+__kernel void TEMPLATE(gemm_buffer_copy_image_no_transpose,Dtype)(
+    __global Dtype* A,
+    __write_only image2d_t ImA,
+    int offA,
+    int width,
+    int height,
+    int ldA)
+{
+    const int gidx = get_global_id(0);
+    const int gidy = get_global_id(1);
+    int2 coord_dst = (int2)(gidx, gidy);
+    if (gidx >= width || gidy >= height) {
+      write_imageui(ImA, coord_dst, (uint4)0);
+      return;
+    }
+    __global Dtype* A_off = A + offA;
+    uint4 srcA = convert_uint4(as_uchar4(A_off[gidy * ldA + gidx]));
+    write_imageui(ImA, coord_dst, srcA);
+}
diff --git a/modules/dnn/src/opencl/math.cl b/modules/dnn/src/opencl/math.cl
new file mode 100644 (file)
index 0000000..b8f4eff
--- /dev/null
@@ -0,0 +1,55 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#define CONCAT(A,B) A##_##B
+#define TEMPLATE(name,type) CONCAT(name,type)
+#define Dtype float
+
+__kernel void TEMPLATE(axpy,Dtype)(const int n, const Dtype alpha, __global const Dtype* x,
+                                   const int offx, __global Dtype* y,
+                                   const int offy) {
+  for (int index = get_global_id(0); index < n; index += get_global_size(0)) {
+    Dtype src = x[offx + index];
+    Dtype dst = y[offy + index];
+    y[offy + index] = alpha * src + dst;
+  }
+}
diff --git a/modules/dnn/src/opencl/matvec_mul.cl b/modules/dnn/src/opencl/matvec_mul.cl
new file mode 100644 (file)
index 0000000..0dabd62
--- /dev/null
@@ -0,0 +1,191 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#define CONCAT(A,B) A##_##B
+#define TEMPLATE(name,type) CONCAT(name,type)
+#define Dtype float
+
+__kernel void TEMPLATE(matvec_mul4,Dtype)(
+          __global const float * A,
+          int offA,
+          unsigned int A_col_size,
+          unsigned int trail_item,
+          __global const float * v,
+          int offv,
+          float alpha,
+          float beta,
+          __global float4 * result,
+          int offr,
+          __local float4 * work)
+{
+  unsigned int row_gid = get_group_id(0);
+  unsigned int lid = get_local_id(0);
+  const __global float *src0_read = A + row_gid * 4 * A_col_size + offA;
+  const __global float *src1_read = v + offv;
+  result = (__global float4*)((__global float*)result + offr);
+  float4 dot0 = (float4)(0.f);
+  float4 dot1 = (float4)(0.f);
+  float4 dot2 = (float4)(0.f);
+  float4 dot3 = (float4)(0.f);
+
+  unsigned int i = lid;
+  while( i < A_col_size / 4) {
+    const float4 a0 = vload4(i, src0_read);
+    const float4 a1 = vload4(i, src0_read + A_col_size);
+    const float4 a2 = vload4(i, src0_read + 2 * A_col_size);
+    const float4 a3 = vload4(i, src0_read + 3 * A_col_size);
+
+    const float4 b0 = vload4(i, src1_read);
+
+    dot0 += a0 * b0;
+    dot1 += a1 * b0;
+    dot2 += a2 * b0;
+    dot3 += a3 * b0;
+
+    i += get_local_size(0);
+  }
+
+  work[lid].s0 = dot0.x + dot0.y + dot0.z + dot0.w;
+  work[lid].s1 = dot1.x + dot1.y + dot1.z + dot1.w;
+  work[lid].s2 = dot2.x + dot2.y + dot2.z + dot2.w;
+  work[lid].s3 = dot3.x + dot3.y + dot3.z + dot3.w;
+
+  if(i == A_col_size / 4)
+  {
+    if(trail_item != 0)
+    {
+      const __global float *src0_trail = src0_read + i * 4;
+      const __global float *src1_trail = src1_read + i * 4;
+      for(unsigned int i = 0; i < trail_item; ++i) {
+        const float at0 = src0_trail[i];
+        const float at1 = src0_trail[i + A_col_size];
+        const float at2 = src0_trail[i + 2 * A_col_size];
+        const float at3 = src0_trail[i + 3 * A_col_size];
+
+        const float bt = src1_trail[i];
+
+        work[lid].s0 += at0 * bt;
+        work[lid].s1 += at1 * bt;
+        work[lid].s2 += at2 * bt;
+        work[lid].s3 += at3 * bt;
+      }
+    }
+
+  }
+
+  for(unsigned int stride=get_local_size(0)/2 ; stride>0 ; stride>>=1) {
+      barrier(CLK_LOCAL_MEM_FENCE);
+      if(lid < stride)
+        work[lid] += work[lid+stride];
+  }
+  if(lid == 0) {
+    if(beta == (Dtype)0)
+      result[row_gid] = alpha * work[0];
+    else
+      result[row_gid] = alpha * work[0] + beta * result[row_gid];
+  }
+}
+
+/* This kernel used for the trailing rows when row_of_A %4 !=0 */
+__kernel void TEMPLATE(matvec_mul1,Dtype)(
+          __global const float * A,
+          int offA,
+          unsigned int A_col_size,
+          unsigned int row_offset,
+          unsigned int trail_item,
+          __global const float * v,
+          int offv,
+          float alpha,
+          float beta,
+          __global float * result,
+          int offr,
+          __local float * work)
+{
+  unsigned int row_gid = get_group_id(0);
+  unsigned int lid = get_local_id(0);
+
+  const __global float *src0_read = A + (row_offset + row_gid) * A_col_size + offA;
+  const __global float *src1_read = v + + offv;
+  result = result + offr;
+  float4 dot0 = (float4)(0.f);
+
+  unsigned int i = lid;
+  while( i < A_col_size / 4)
+  {
+    const float4 a0 = vload4(i, src0_read);
+    const float4 b0 = vload4(i, src1_read);
+
+    dot0 += a0 * b0;
+    i += get_local_size(0);
+  }
+
+  work[lid] = dot0.x + dot0.y + dot0.z + dot0.w;
+
+  if(i == A_col_size / 4)
+  {
+    if(trail_item != 0)
+    {
+      const __global float *src0_trail = src0_read + i * 4;
+      const __global float *src1_trail = src1_read + i * 4;
+      for(unsigned int i = 0; i < trail_item; ++i) {
+        const float at0 = src0_trail[i];
+        const float bt = src1_trail[i];
+
+        work[lid] += at0 * bt;
+      }
+    }
+
+  }
+  for(unsigned int stride=get_local_size(0)/2 ; stride>0 ; stride>>=1) {
+      barrier(CLK_LOCAL_MEM_FENCE);
+      if(lid < stride)
+        work[lid] += work[lid+stride];
+  }
+
+  if(lid == 0) {
+    if(beta == (Dtype)0) {
+      result[row_gid+row_offset] = alpha * work[0];
+    } else {
+      result[row_gid+row_offset] *= beta;
+      result[row_gid+row_offset] += alpha * work[0];
+    }
+  }
+}
diff --git a/modules/dnn/src/opencl/ocl4dnn_lrn.cl b/modules/dnn/src/opencl/ocl4dnn_lrn.cl
new file mode 100644 (file)
index 0000000..58477ce
--- /dev/null
@@ -0,0 +1,96 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#define CONCAT(A,B) A##_##B
+#define TEMPLATE(name,type) CONCAT(name,type)
+#define Dtype float
+
+__kernel void TEMPLATE(lrn_full_no_scale,Dtype)(const int nthreads, __global const Dtype* in,
+                             const int num, const int channels,
+                             const int height, const int width, const int size,
+                             const Dtype alpha_over_size, const Dtype k,
+                             __global Dtype* const out,
+                             const Dtype negative_beta) {
+  for (int index = get_global_id(0); index < nthreads;
+      index += get_global_size(0)) {
+    // find out the local offset
+    const int w = index % width;
+    const int h = (index / width) % height;
+    const int n = index / width / height;
+    const int offset = (n * channels * height + h) * width + w;
+    const int step = height * width;
+    __global const Dtype* in_off = in + offset;
+    __global Dtype* out_off = out + offset;
+    Dtype scale_val;
+    int head = 0;
+    const int pre_pad = (size - 1) / 2;
+    const int post_pad = size - pre_pad - 1;
+    Dtype accum_scale = 0;
+    // fill the scale at [n, :, h, w]
+    // accumulate values
+    while (head < post_pad && head < channels) {
+      accum_scale += in_off[head * step] * in_off[head * step];
+      ++head;
+    }
+    // both add and subtract
+    while (head < channels) {
+      accum_scale += in_off[head * step] * in_off[head * step];
+      if (head - size >= 0) {
+        accum_scale -= in_off[(head - size) * step]
+            * in_off[(head - size) * step];
+      }
+      scale_val = k + accum_scale * alpha_over_size;
+      out_off[(head - post_pad) * step] = in_off[(head - post_pad) * step] * (Dtype)native_powr((float)scale_val, (float)negative_beta);
+      ++head;
+    }
+    // subtract only
+    while (head < channels + post_pad) {
+      if (head - size >= 0) {
+        accum_scale -= in_off[(head - size) * step]
+            * in_off[(head - size) * step];
+      }
+      scale_val = k + accum_scale * alpha_over_size;
+      out_off[(head - post_pad) * step] = in_off[(head - post_pad) * step] * (Dtype)native_powr((float)scale_val, (float)negative_beta);
+      ++head;
+    }
+  }
+}
diff --git a/modules/dnn/src/opencl/ocl4dnn_pooling.cl b/modules/dnn/src/opencl/ocl4dnn_pooling.cl
new file mode 100644 (file)
index 0000000..326d5bc
--- /dev/null
@@ -0,0 +1,177 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#define CONCAT(A,B) A##_##B
+#define TEMPLATE(name,type) CONCAT(name,type)
+#define Dtype float
+
+void TEMPLATE(max_pool_forward_impl, Dtype)(
+    const int nthreads, __global const Dtype* bottom_data, const int num,
+    const int channels, const int height, const int width,
+    const int pooled_height, const int pooled_width, const int kernel_h,
+    const int kernel_w, const int stride_h, const int stride_w, const int pad_h,
+    const int pad_w,
+    __global Dtype* top_data,
+    const int use_mask, __global int* mask, __global Dtype* top_mask, bool no_mask)
+{
+  for (int index = get_global_id(0); index < nthreads;
+      index += get_global_size(0))
+  {
+    const int pw = index % pooled_width;
+    const int ph = (index / pooled_width) % pooled_height;
+    const int c = (index / pooled_width / pooled_height) % channels;
+    const int n = index / pooled_width / pooled_height / channels;
+    int hstart = ph * stride_h - pad_h;
+    int wstart = pw * stride_w - pad_w;
+    const int hend = min(hstart + kernel_h, height);
+    const int wend = min(wstart + kernel_w, width);
+    hstart = max(hstart, (int)0);
+    wstart = max(wstart, (int)0);
+    Dtype maxval = -FLT_MAX;
+    int maxidx = -1;
+    __global const Dtype* bottom_slice = bottom_data
+        + (n * channels + c) * height * width;
+    for (int h = hstart; h < hend; ++h) {
+      for (int w = wstart; w < wend; ++w) {
+        if (bottom_slice[h * width + w] > maxval) {
+          maxidx = h * width + w;
+          maxval = bottom_slice[maxidx];
+        }
+      }
+    }
+    top_data[index] = maxval;
+    if (!no_mask) {
+      if (use_mask == 1) {
+        mask[index] = maxidx;
+      } else {
+        top_mask[index] = maxidx;
+      }
+    }
+  }
+}
+
+__kernel void TEMPLATE(max_pool_forward, Dtype)(
+    const int nthreads, __global const Dtype* bottom_data, const int num,
+    const int channels, const int height, const int width,
+    const int pooled_height, const int pooled_width, const int kernel_h,
+    const int kernel_w, const int stride_h, const int stride_w, const int pad_h,
+    const int pad_w,
+    __global Dtype* top_data,
+    const int use_mask, __global int* mask, __global Dtype* top_mask)
+{
+    TEMPLATE(max_pool_forward_impl, Dtype)(
+      nthreads, bottom_data, num, channels, height, width,
+      pooled_height, pooled_width, kernel_h,
+      kernel_w, stride_h, stride_w, pad_h, pad_w, top_data, use_mask, mask, top_mask, false
+    );
+}
+
+__kernel void TEMPLATE(ave_pool_forward, Dtype)(
+    const int nthreads, __global const Dtype* const bottom_data, const int num,
+    const int channels, const int height, const int width,
+    const int pooled_height, const int pooled_width, const int kernel_h,
+    const int kernel_w, const int stride_h, const int stride_w, const int pad_h,
+    const int pad_w, __global Dtype* top_data)
+{
+  for (int index = get_global_id(0); index < nthreads;
+      index += get_global_size(0))
+  {
+    {
+      const int pw = index % pooled_width;
+      const int ph = (index / pooled_width) % pooled_height;
+      const int c = (index / pooled_width / pooled_height) % channels;
+      const int n = index / pooled_width / pooled_height / channels;
+      int hstart = ph * stride_h - pad_h;
+      int wstart = pw * stride_w - pad_w;
+      int hend = min(hstart + kernel_h, height + pad_h);
+      int wend = min(wstart + kernel_w, width + pad_w);
+      const int pool_size = (hend - hstart) * (wend - wstart);
+      hstart = max(hstart, (int)0);
+      wstart = max(wstart, (int)0);
+      hend = min(hend, height);
+      wend = min(wend, width);
+      Dtype aveval = 0;
+      __global const Dtype* bottom_slice = bottom_data
+          + (n * channels + c) * height * width;
+      for (int h = hstart; h < hend; ++h) {
+        for (int w = wstart; w < wend; ++w) {
+          aveval += bottom_slice[h * width + w];
+        }
+      }
+      top_data[index] = aveval / pool_size;
+    }
+  }
+}
+
+__kernel void TEMPLATE(sto_pool_forward_test,Dtype)(
+    const int nthreads, __global const Dtype* const bottom_data, const int num,
+    const int channels, const int height, const int width,
+    const int pooled_height, const int pooled_width, const int kernel_h,
+    const int kernel_w, const int stride_h, const int stride_w,
+    __global Dtype* top_data)
+{
+  for (int index = get_global_id(0); index < nthreads;
+      index += get_global_size(0))
+  {
+    const int pw = index % pooled_width;
+    const int ph = (index / pooled_width) % pooled_height;
+    const int c = (index / pooled_width / pooled_height) % channels;
+    const int n = index / pooled_width / pooled_height / channels;
+    const int hstart = ph * stride_h;
+    const int hend = min(hstart + kernel_h, height);
+    const int wstart = pw * stride_w;
+    const int wend = min(wstart + kernel_w, width);
+    // We set cumsum to be 0 to avoid divide-by-zero problems
+    Dtype cumsum = FLT_MIN;
+    Dtype cumvalues = 0.;
+    __global const Dtype* bottom_slice = bottom_data
+        + (n * channels + c) * height * width;
+    // First pass: get sum
+    for (int h = hstart; h < hend; ++h) {
+      for (int w = wstart; w < wend; ++w) {
+        cumsum += bottom_slice[h * width + w];
+        cumvalues += bottom_slice[h * width + w] * bottom_slice[h * width + w];
+      }
+    }
+    top_data[index] = cumvalues / cumsum;
+  }
+}
index e9fcadc..54cf489 100644 (file)
@@ -70,6 +70,10 @@ __kernel void kernel_channel_div(const int count,
   if(index < count) {
     int n = index / channels / spatial_dim;
     int s = index % spatial_dim;
-    data[index] /= channel_sum[n * spatial_dim + s];
+    T v = data[index] / channel_sum[n * spatial_dim + s];
+#ifdef LOG_SOFTMAX
+    v = log(v);
+#endif
+    data[index] = v;
   }
-}
\ No newline at end of file
+}
diff --git a/modules/dnn/src/opencl/softmax_loss.cl b/modules/dnn/src/opencl/softmax_loss.cl
new file mode 100644 (file)
index 0000000..d30b32b
--- /dev/null
@@ -0,0 +1,182 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#define CONCAT(A,B) A##_##B
+#define TEMPLATE(name,type) CONCAT(name,type)
+#define Dtype float
+
+#if defined(cl_intel_subgroups)
+#pragma OPENCL EXTENSION  cl_intel_subgroups : enable
+#endif
+
+__kernel void TEMPLATE(softmax_forward_slm,Dtype)(const int num, const int channels,
+                                   const int spatial_dim,
+                                   __global Dtype* scale,
+                                   __global const Dtype* data,
+                                   __global Dtype* out,
+                                   __local Dtype *out_tmp,
+                                   __local Dtype *scale_tmp,
+                                   __local Dtype *group_tmp) {
+
+  int n = get_global_id(1);
+  for (int index = get_global_id(0), s = 0; index < spatial_dim * get_local_size(0); index +=
+      get_global_size(0), ++s) {
+    float maxval = -FLT_MAX;
+    for (int c = get_global_id(0); c < channels; c += get_global_size(0)) {
+      Dtype tmp = data[(n * channels + c) * spatial_dim + s];
+      maxval = max((Dtype)tmp, (Dtype)maxval);
+    }
+    maxval = sub_group_reduce_max(maxval * 100000);
+    //if (get_sub_group_local_id() == 0)
+    group_tmp[get_sub_group_id() * spatial_dim + s] = maxval;
+  }
+
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for (int index = get_global_id(0); index < spatial_dim * get_max_sub_group_size(); index +=
+      get_global_size(0)) {
+    int s = index / get_max_sub_group_size();
+    Dtype maxval = sub_group_reduce_max(group_tmp[get_sub_group_local_id() * spatial_dim + s]);
+    //if (get_sub_group_local_id() == 0)
+    scale_tmp[s] = maxval / 100000;
+  }
+
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for (int index = get_global_id(0); index < channels * spatial_dim;
+      index += get_global_size(0)) {
+    int s = index % spatial_dim;
+    out_tmp[index] = exp(data[n * channels * spatial_dim + index] - scale_tmp[s]);
+  }
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for (int index = get_global_id(0), s = 0; index < spatial_dim * get_local_size(0); index +=
+      get_global_size(0), ++s) {
+    Dtype sum = 0;
+    for (int c = get_global_id(0); c < channels; c += get_global_size(0)) {
+      sum += out_tmp[c * spatial_dim + s];
+    }
+    sum = sub_group_reduce_add(sum * 100000);
+    group_tmp[get_sub_group_id() * spatial_dim + s] = sum;
+  }
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for (int index = get_global_id(0); index < spatial_dim * get_max_sub_group_size(); index +=
+      get_global_size(0)) {
+    int s = index / get_max_sub_group_size();
+    Dtype sum = sub_group_reduce_add(group_tmp[get_sub_group_local_id() * spatial_dim + s]);
+    //if (get_sub_group_local_id() == 0)
+    scale_tmp[s] = sum / 100000;
+  }
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for (int index = get_global_id(0); index < channels * spatial_dim;
+      index += get_global_size(0)) {
+    int s = index % spatial_dim;
+    out[n * channels * spatial_dim + index] = out_tmp[index] / scale_tmp[s];
+  }
+}
+
+__kernel void TEMPLATE(softmax_forward,Dtype)(const int num, const int channels,
+                                   const int spatial_dim,
+                                   __global Dtype* scale,
+                                   __global const Dtype* data,
+                                   __global Dtype* out) {
+
+  int n = get_global_id(1);
+  __global Dtype *group_tmp = scale + spatial_dim * num + n * get_max_sub_group_size() * spatial_dim;
+  for (int index = get_global_id(0), s = 0; index < spatial_dim * get_local_size(0); index +=
+      get_global_size(0), ++s) {
+    float maxval = -FLT_MAX;
+    for (int c = get_global_id(0); c < channels; c += get_global_size(0)) {
+      Dtype tmp = data[(n * channels + c) * spatial_dim + s];
+      maxval = max((Dtype)tmp, (Dtype)maxval);
+    }
+    maxval = sub_group_reduce_max(maxval * 100000);
+    //if (get_sub_group_local_id() == 0)
+    group_tmp[get_sub_group_id() * spatial_dim + s] = maxval;
+  }
+  barrier(CLK_GLOBAL_MEM_FENCE);
+
+  for (int index = get_global_id(0); index < spatial_dim * get_max_sub_group_size(); index +=
+      get_global_size(0)) {
+    int s = index / get_max_sub_group_size();
+    Dtype maxval = sub_group_reduce_max(group_tmp[get_sub_group_local_id() * spatial_dim + s]);
+    //if (get_sub_group_local_id() == 0)
+    scale[n * spatial_dim + s] = maxval / 100000;
+  }
+
+  barrier(CLK_GLOBAL_MEM_FENCE);
+
+  for (int index = get_global_id(0); index < channels * spatial_dim;
+      index += get_global_size(0)) {
+    int s = index % spatial_dim;
+    out[n * channels * spatial_dim + index] = exp(data[n * channels * spatial_dim + index] - scale[n * spatial_dim + s]);
+  }
+  barrier(CLK_GLOBAL_MEM_FENCE);
+
+  for (int index = get_global_id(0), s = 0; index < spatial_dim * get_local_size(0); index +=
+      get_global_size(0), ++s) {
+    Dtype sum = 0;
+    for (int c = get_global_id(0); c < channels; c += get_global_size(0)) {
+      sum += out[n * channels * spatial_dim + c * spatial_dim + s];
+    }
+    sum = sub_group_reduce_add(sum * 100000);
+    group_tmp[get_sub_group_id() * spatial_dim + s] = sum;
+  }
+  barrier(CLK_GLOBAL_MEM_FENCE);
+
+  for (int index = get_global_id(0); index < spatial_dim * get_max_sub_group_size(); index +=
+      get_global_size(0)) {
+    int s = index / get_max_sub_group_size();
+    Dtype sum = sub_group_reduce_add(group_tmp[get_sub_group_local_id() * spatial_dim + s]);
+    //if (get_sub_group_local_id() == 0)
+    scale[n * spatial_dim + s] = sum / 100000;
+  }
+  barrier(CLK_GLOBAL_MEM_FENCE);
+
+  for (int index = get_global_id(0); index < channels * spatial_dim;
+      index += get_global_size(0)) {
+    int s = index % spatial_dim;
+    out[n * channels * spatial_dim + index] /= scale[n * spatial_dim + s];
+  }
+}
index 9383a08..e860598 100644 (file)
@@ -40,6 +40,8 @@
 //M*/
 
 #include <opencv2/core.hpp>
+#include <opencv2/core/ocl.hpp>
+#include <opencv2/core/opencl/ocl_defs.hpp>
 #include <opencv2/core/utils/trace.hpp>
 #include <opencv2/core/softfloat.hpp> // int32_t (MSVS 2010-2013)
 #include "cvconfig.h"
index 67565cc..f2e83c0 100644 (file)
@@ -321,10 +321,10 @@ DictValue parseDims(const tensorflow::TensorProto &tensor) {
     CV_Assert(tensor.dtype() == tensorflow::DT_INT32);
     CV_Assert(dims == 1);
 
-    int size = tensor.tensor_content().size() / sizeof(int);
-    const int *data = reinterpret_cast<const int*>(tensor.tensor_content().c_str());
+    Mat values = getTensorContent(tensor);
+    CV_Assert(values.type() == CV_32SC1);
     // TODO: add reordering shape if dims == 4
-    return DictValue::arrayInt(data, size);
+    return DictValue::arrayInt((int*)values.data, values.total());
 }
 
 void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
@@ -448,7 +448,7 @@ void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int in
 
 class TFImporter : public Importer {
 public:
-    TFImporter(const char *model);
+    TFImporter(const char *model, const char *config = NULL);
     void populateNet(Net dstNet);
     ~TFImporter() {}
 
@@ -463,13 +463,20 @@ private:
                                                 int input_blob_index = -1, int* actual_inp_blob_idx = 0);
 
 
-    tensorflow::GraphDef net;
+    // Binary serialized TensorFlow graph includes weights.
+    tensorflow::GraphDef netBin;
+    // Optional text definition of TensorFlow graph. More flexible than binary format
+    // and may be used to build the network using binary format only as a weights storage.
+    // This approach is similar to Caffe's `.prorotxt` and `.caffemodel`.
+    tensorflow::GraphDef netTxt;
 };
 
-TFImporter::TFImporter(const char *model)
+TFImporter::TFImporter(const char *model, const char *config)
 {
     if (model && model[0])
-        ReadTFNetParamsFromBinaryFileOrDie(model, &net);
+        ReadTFNetParamsFromBinaryFileOrDie(model, &netBin);
+    if (config && config[0])
+        ReadTFNetParamsFromTextFileOrDie(config, &netTxt);
 }
 
 void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
@@ -557,21 +564,23 @@ const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDe
         *actual_inp_blob_idx = input_blob_index;
     }
 
-    return net.node(const_layers.at(kernel_inp.name)).attr().at("value").tensor();
+    int nodeIdx = const_layers.at(kernel_inp.name);
+    if (nodeIdx < netBin.node_size() && netBin.node(nodeIdx).name() == kernel_inp.name)
+    {
+        return netBin.node(nodeIdx).attr().at("value").tensor();
+    }
+    else
+    {
+        CV_Assert(nodeIdx < netTxt.node_size(),
+                  netTxt.node(nodeIdx).name() == kernel_inp.name);
+        return netTxt.node(nodeIdx).attr().at("value").tensor();
+    }
 }
 
-
-void TFImporter::populateNet(Net dstNet)
+static void addConstNodes(const tensorflow::GraphDef& net, std::map<String, int>& const_layers,
+                          std::set<String>& layers_to_ignore)
 {
-    RemoveIdentityOps(net);
-
-    std::map<int, String> layers_to_ignore;
-
-    int layersSize = net.node_size();
-
-    // find all Const layers for params
-    std::map<String, int> value_id;
-    for (int li = 0; li < layersSize; li++)
+    for (int li = 0; li < net.node_size(); li++)
     {
         const tensorflow::NodeDef &layer = net.node(li);
         String name = layer.name();
@@ -582,11 +591,27 @@ void TFImporter::populateNet(Net dstNet)
 
         if (layer.attr().find("value") != layer.attr().end())
         {
-            value_id.insert(std::make_pair(name, li));
+            CV_Assert(const_layers.insert(std::make_pair(name, li)).second);
         }
-
-        layers_to_ignore[li] = name;
+        layers_to_ignore.insert(name);
     }
+}
+
+void TFImporter::populateNet(Net dstNet)
+{
+    RemoveIdentityOps(netBin);
+    RemoveIdentityOps(netTxt);
+
+    std::set<String> layers_to_ignore;
+
+    tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin;
+
+    int layersSize = net.node_size();
+
+    // find all Const layers for params
+    std::map<String, int> value_id;
+    addConstNodes(netBin, value_id, layers_to_ignore);
+    addConstNodes(netTxt, value_id, layers_to_ignore);
 
     std::map<String, int> layer_id;
 
@@ -597,7 +622,7 @@ void TFImporter::populateNet(Net dstNet)
         String type = layer.op();
         LayerParams layerParams;
 
-        if(layers_to_ignore.find(li) != layers_to_ignore.end())
+        if(layers_to_ignore.find(name) != layers_to_ignore.end())
             continue;
 
         if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative")
@@ -627,7 +652,7 @@ void TFImporter::populateNet(Net dstNet)
                 StrIntVector next_layers = getNextLayers(net, name, "Conv2D");
                 CV_Assert(next_layers.size() == 1);
                 layer = net.node(next_layers[0].second);
-                layers_to_ignore[next_layers[0].second] = next_layers[0].first;
+                layers_to_ignore.insert(next_layers[0].first);
                 name = layer.name();
                 type = layer.op();
             }
@@ -644,7 +669,7 @@ void TFImporter::populateNet(Net dstNet)
 
                 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
                 ExcludeLayer(net, weights_layer_index, 0, false);
-                layers_to_ignore[weights_layer_index] = next_layers[0].first;
+                layers_to_ignore.insert(next_layers[0].first);
             }
 
             kernelFromTensor(getConstBlob(layer, value_id), layerParams.blobs[0]);
@@ -684,7 +709,7 @@ void TFImporter::populateNet(Net dstNet)
                 layerParams.set("pad_mode", "");  // We use padding values.
                 CV_Assert(next_layers.size() == 1);
                 ExcludeLayer(net, next_layers[0].second, 0, false);
-                layers_to_ignore[next_layers[0].second] = next_layers[0].first;
+                layers_to_ignore.insert(next_layers[0].first);
             }
 
             int id = dstNet.addLayer(name, "Convolution", layerParams);
@@ -748,7 +773,7 @@ void TFImporter::populateNet(Net dstNet)
                 int weights_layer_index = next_layers[0].second;
                 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
                 ExcludeLayer(net, weights_layer_index, 0, false);
-                layers_to_ignore[weights_layer_index] = next_layers[0].first;
+                layers_to_ignore.insert(next_layers[0].first);
             }
 
             int kernel_blob_index = -1;
@@ -778,6 +803,30 @@ void TFImporter::populateNet(Net dstNet)
             // one input only
             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
         }
+        else if (type == "Flatten")
+        {
+            int id = dstNet.addLayer(name, "Flatten", layerParams);
+            layer_id[name] = id;
+            connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
+        }
+        else if (type == "Transpose")
+        {
+            Mat perm = getTensorContent(getConstBlob(layer, value_id, 1));
+            CV_Assert(perm.type() == CV_32SC1);
+            int* permData = (int*)perm.data;
+            if (perm.total() == 4)
+            {
+                for (int i = 0; i < 4; ++i)
+                    permData[i] = toNCHW[permData[i]];
+            }
+            layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));
+
+            int id = dstNet.addLayer(name, "Permute", layerParams);
+            layer_id[name] = id;
+
+            // one input only
+            connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
+        }
         else if (type == "Const")
         {
         }
@@ -807,7 +856,7 @@ void TFImporter::populateNet(Net dstNet)
         {
             int axisId = (type == "Concat" ? 0 : layer.input_size() - 1);
             int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
-            layerParams.set("axis", toNCHW[axis]);
+            layerParams.set("axis", 0 <= axis && axis < 4 ? toNCHW[axis] : axis);
 
             int id = dstNet.addLayer(name, "Concat", layerParams);
             layer_id[name] = id;
@@ -866,8 +915,6 @@ void TFImporter::populateNet(Net dstNet)
             CV_Assert(layer.input_size() == 2);
             // num_split
             // 1st blob is dims tensor
-            layerParams.set("slice_point", DictValue::arrayReal((double*)0, 0));
-
             int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
             layerParams.set("axis", toNCHW[axis]);
 
@@ -877,6 +924,34 @@ void TFImporter::populateNet(Net dstNet)
             // one input only
             connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
         }
+        else if (type == "Slice")
+        {
+            // op: "Slice"
+            // input: "input_node"
+            // input: "Slice/begin"
+            // input: "Slice/size"
+            CV_Assert(layer.input_size() == 3);
+
+            const tensorflow::TensorProto begins = getConstBlob(layer, value_id, 1);
+            const tensorflow::TensorProto sizes = getConstBlob(layer, value_id, 2);
+            std::string beginsData = begins.tensor_content();
+            std::string sizesData = sizes.tensor_content();
+            CV_Assert(begins.dtype() == tensorflow::DT_INT32);
+            CV_Assert(sizes.dtype() == tensorflow::DT_INT32);
+            CV_Assert(!beginsData.empty());
+            CV_Assert(!sizesData.empty());
+            CV_Assert(beginsData.size() == sizesData.size());
+
+            layerParams.set("begin", DictValue::arrayInt((int*)beginsData.c_str(),
+                                                         beginsData.size() / 4));
+            layerParams.set("size", DictValue::arrayInt((int*)sizesData.c_str(),
+                                                        sizesData.size() / 4));
+
+            int id = dstNet.addLayer(name, "Slice", layerParams);
+            layer_id[name] = id;
+
+            connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
+        }
         else if (type == "Mul")
         {
             bool haveConst = false;
@@ -903,6 +978,19 @@ void TFImporter::populateNet(Net dstNet)
                 else  // is a vector
                 {
                     layerParams.blobs.resize(1, scaleMat);
+
+                   StrIntVector next_layers = getNextLayers(net, name, "Add");
+                   if (!next_layers.empty())
+                   {
+                       layerParams.set("bias_term", true);
+                       layerParams.blobs.resize(2);
+
+                       int weights_layer_index = next_layers[0].second;
+                       blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs.back());
+                       ExcludeLayer(net, weights_layer_index, 0, false);
+                       layers_to_ignore.insert(next_layers[0].first);
+                   }
+
                     id = dstNet.addLayer(name, "Scale", layerParams);
                 }
                 layer_id[name] = id;
@@ -931,51 +1019,28 @@ void TFImporter::populateNet(Net dstNet)
         }
         else if (type == "Pad")
         {
-            tensorflow::TensorProto paddings = getConstBlob(layer, value_id, 1);
-            MatShape shape;
-            blobShapeFromTensor(paddings, shape);
-            if (shape[0] != 4)
-                CV_Error(Error::StsError, "Expected NHWC data format");
-
-            // Copy tensor with paddings.
-            std::vector<int32_t> values(shape[0] * 2);
-            CV_Assert(sizeof(int32_t) * values.size() ==
-                      paddings.tensor_content().size());
-            memcpy(&values[0], &paddings.tensor_content()[0],
-                   paddings.tensor_content().size());
-
-            // Allow only one padding operation per layer.
-            bool padded = false;
-            for (int i = 0; i < values.size(); ++i)
+            Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1));
+            CV_Assert(paddings.type() == CV_32SC1);
+            if (paddings.total() == 8)
             {
-                if (values[i])
-                {
-                    if (padded)
-                        CV_Error(Error::StsError,
-                                 "Only single padding operation per layer is supported");
-                    padded = true;
-
-                    int axis = i / 2;
-                    // Remap NHWC to NCHW.
-                    // 0 -> 0
-                    // 1 -> 2
-                    // 2 -> 3
-                    // 3 -> 1
-                    if (axis != 0)
-                        axis = axis % 3 + 1;
-
-                    layerParams.set("padding_dim", axis);
-                    if (i % 2)  // Pad after
-                        layerParams.set("padding", values[i]);
-                    else  // Pad before
-                        layerParams.set("padding", -1 * values[i]);
-
-                    int id = dstNet.addLayer(name, "Padding", layerParams);
-                    layer_id[name] = id;
-
-                    connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
-                }
+                // Perhabs, we have NHWC padding dimensions order.
+                //  N    H    W    C
+                // 0 1  2 3  4 5  6 7
+                std::swap(*paddings.ptr<int32_t>(0, 2), *paddings.ptr<int32_t>(0, 6));
+                std::swap(*paddings.ptr<int32_t>(0, 3), *paddings.ptr<int32_t>(0, 7));
+                //  N    C    W    H
+                // 0 1  2 3  4 5  6 7
+                std::swap(*paddings.ptr<int32_t>(0, 4), *paddings.ptr<int32_t>(0, 6));
+                std::swap(*paddings.ptr<int32_t>(0, 5), *paddings.ptr<int32_t>(0, 7));
+                //  N    C    H    W
+                // 0 1  2 3  4 5  6 7
             }
+            layerParams.set("paddings", DictValue::arrayInt<int*>((int*)paddings.data, paddings.total()));
+
+            int id = dstNet.addLayer(name, "Padding", layerParams);
+            layer_id[name] = id;
+
+            connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
         }
         else if (type == "FusedBatchNorm")
         {
@@ -1034,17 +1099,15 @@ void TFImporter::populateNet(Net dstNet)
 
                 blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
                 ExcludeLayer(net, weights_layer_index, 0, false);
-                layers_to_ignore[weights_layer_index] = next_layers[0].first;
+                layers_to_ignore.insert(next_layers[0].first);
             }
 
             kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);
-            // Swap just numbers of input and output channels.
-            std::swap(layerParams.blobs[0].size[0], layerParams.blobs[0].size[1]);
 
             const int* kshape = layerParams.blobs[0].size.p;
             layerParams.set("kernel_h", kshape[2]);
             layerParams.set("kernel_w", kshape[3]);
-            layerParams.set("num_output", kshape[0]);
+            layerParams.set("num_output", kshape[1]);
 
             setStrides(layerParams, layer);
             setPadding(layerParams, layer);
@@ -1055,6 +1118,163 @@ void TFImporter::populateNet(Net dstNet)
             // one input only
             connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
         }
+        else if (type == "BlockLSTM")
+        {
+            // op: "BlockLSTM"
+            // input: "lstm_block_wrapper/ToInt64/x"  (ignore, number of time stamps)
+            // input: "input"
+            // input: "lstm_block_wrapper/zeros"      (ignore)
+            // input: "lstm_block_wrapper/zeros"      (ignore)
+            // input: "lstm_block_wrapper/kernel"
+            // input: "lstm_block_wrapper/w_i_diag"
+            // input: "lstm_block_wrapper/w_f_diag"
+            // input: "lstm_block_wrapper/w_o_diag"
+            // input: "lstm_block_wrapper/bias"
+            if (layer.input_size() != 9)
+                CV_Error(Error::StsNotImplemented, "Unexpected number of input nodes");
+
+            if (hasLayerAttr(layer, "forget_bias"))
+                layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f());
+
+            if (hasLayerAttr(layer, "forget_bias"))
+            {
+                float cellClip = getLayerAttr(layer, "cell_clip").f();
+                // Cell clip disabled if it's negative.
+                if (cellClip >= 0)
+                {
+                    layerParams.set("use_cell_clip", true);
+                    layerParams.set("cell_clip", cellClip);
+                }
+            }
+
+            Mat W, Wh, Wx, b;
+            blobFromTensor(getConstBlob(layer, value_id, 4), W);
+            blobFromTensor(getConstBlob(layer, value_id, 8), b);
+            const int outSize = W.cols / 4;
+
+            // IGFO->IFOG
+            float* weightData = (float*)W.data;
+            for (int i = 0; i < W.rows; ++i)
+                for (int j = 0; j < outSize; ++j)
+                {
+                    std::swap(weightData[i * W.cols + 1 * outSize + j],
+                              weightData[i * W.cols + 2 * outSize + j]);
+                    std::swap(weightData[i * W.cols + 2 * outSize + j],
+                              weightData[i * W.cols + 3 * outSize + j]);
+                }
+            Wx = W.rowRange(0, W.rows - outSize).t();
+            Wh = W.rowRange(W.rows - outSize, W.rows).t();
+
+            layerParams.blobs.resize(3);
+            layerParams.blobs[0] = Wh;
+            layerParams.blobs[1] = Wx;
+            layerParams.blobs[2] = b;
+
+            if (hasLayerAttr(layer, "use_peephole"))
+            {
+                bool usePeephole = getLayerAttr(layer, "use_peephole").b();
+                if (usePeephole)
+                {
+                    layerParams.set("use_peephole", true);
+                    layerParams.blobs.resize(6);
+                    for (int i = 0; i < 3; ++i)
+                    {
+                        Mat w;
+                        blobFromTensor(getConstBlob(layer, value_id, 5 + i), w);
+                        w = w.reshape(1, w.total());  // Single column.
+                        w = Mat::diag(w);  // Make a diagonal matrix.
+                        layerParams.blobs[3 + i] = w;
+                    }
+                }
+            }
+
+            int id = dstNet.addLayer(name, "LSTM", layerParams);
+            layer_id[name] = id;
+
+            // one input only
+            connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
+        }
+        else if (type == "ResizeNearestNeighbor")
+        {
+            Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1));
+            CV_Assert(outSize.type() == CV_32SC1, outSize.total() == 2);
+
+            layerParams.set("height", outSize.at<int>(0, 0));
+            layerParams.set("width", outSize.at<int>(0, 1));
+
+            if (hasLayerAttr(layer, "align_corners"))
+                layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b());
+
+            int id = dstNet.addLayer(name, "ResizeNearestNeighbor", layerParams);
+            layer_id[name] = id;
+
+            connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
+        }
+        else if (type == "PriorBox")
+        {
+            if (hasLayerAttr(layer, "min_size"))
+                layerParams.set("min_size", getLayerAttr(layer, "min_size").i());
+            if (hasLayerAttr(layer, "max_size"))
+                layerParams.set("max_size", getLayerAttr(layer, "max_size").i());
+            if (hasLayerAttr(layer, "flip"))
+                layerParams.set("flip", getLayerAttr(layer, "flip").b());
+            if (hasLayerAttr(layer, "clip"))
+                layerParams.set("clip", getLayerAttr(layer, "clip").b());
+            if (hasLayerAttr(layer, "offset"))
+                layerParams.set("offset", getLayerAttr(layer, "offset").f());
+            if (hasLayerAttr(layer, "variance"))
+            {
+                Mat variance = getTensorContent(getLayerAttr(layer, "variance").tensor());
+                layerParams.set("variance",
+                                DictValue::arrayReal<float*>((float*)variance.data, variance.total()));
+            }
+            if (hasLayerAttr(layer, "aspect_ratio"))
+            {
+                Mat aspectRatios = getTensorContent(getLayerAttr(layer, "aspect_ratio").tensor());
+                layerParams.set("aspect_ratio",
+                               DictValue::arrayReal<float*>((float*)aspectRatios.data, aspectRatios.total()));
+            }
+            if (hasLayerAttr(layer, "scales"))
+            {
+                Mat scales = getTensorContent(getLayerAttr(layer, "scales").tensor());
+                layerParams.set("scales",
+                               DictValue::arrayReal<float*>((float*)scales.data, scales.total()));
+            }
+            int id = dstNet.addLayer(name, "PriorBox", layerParams);
+            layer_id[name] = id;
+            connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
+            connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
+        }
+        else if (type == "DetectionOutput")
+        {
+            // op: "DetectionOutput"
+            // input_0: "locations"
+            // input_1: "classifications"
+            // input_2: "prior_boxes"
+            if (hasLayerAttr(layer, "num_classes"))
+                layerParams.set("num_classes", getLayerAttr(layer, "num_classes").i());
+            if (hasLayerAttr(layer, "share_location"))
+                layerParams.set("share_location", getLayerAttr(layer, "share_location").b());
+            if (hasLayerAttr(layer, "background_label_id"))
+                layerParams.set("background_label_id", getLayerAttr(layer, "background_label_id").i());
+            if (hasLayerAttr(layer, "nms_threshold"))
+                layerParams.set("nms_threshold", getLayerAttr(layer, "nms_threshold").f());
+            if (hasLayerAttr(layer, "top_k"))
+                layerParams.set("top_k", getLayerAttr(layer, "top_k").i());
+            if (hasLayerAttr(layer, "code_type"))
+                layerParams.set("code_type", getLayerAttr(layer, "code_type").s());
+            if (hasLayerAttr(layer, "keep_top_k"))
+                layerParams.set("keep_top_k", getLayerAttr(layer, "keep_top_k").i());
+            if (hasLayerAttr(layer, "confidence_threshold"))
+                layerParams.set("confidence_threshold", getLayerAttr(layer, "confidence_threshold").f());
+            if (hasLayerAttr(layer, "loc_pred_transposed"))
+                layerParams.set("loc_pred_transposed", getLayerAttr(layer, "loc_pred_transposed").b());
+
+            int id = dstNet.addLayer(name, "DetectionOutput", layerParams);
+            layer_id[name] = id;
+            for (int i = 0; i < 3; ++i)
+                connect(layer_id, dstNet, parsePin(layer.input(i)), id, i);
+        }
         else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" ||
                  type == "Relu" || type == "Elu" || type == "Softmax" ||
                  type == "Identity" || type == "Relu6")
@@ -1095,9 +1315,9 @@ Ptr<Importer> createTensorflowImporter(const String&)
 
 #endif //HAVE_PROTOBUF
 
-Net readNetFromTensorflow(const String &model)
+Net readNetFromTensorflow(const String &model, const String &config)
 {
-    TFImporter importer(model.c_str());
+    TFImporter importer(model.c_str(), config.c_str());
     Net net;
     importer.populateNet(net);
     return net;
index d96d006..694ddd6 100644 (file)
@@ -52,12 +52,27 @@ bool ReadProtoFromBinaryFileTF(const char* filename, Message* proto) {
     return success;
 }
 
+bool ReadProtoFromTextFileTF(const char* filename, Message* proto) {
+    std::ifstream fs(filename, std::ifstream::in);
+    CHECK(fs.is_open()) << "Can't open \"" << filename << "\"";
+    IstreamInputStream input(&fs);
+    bool success = google::protobuf::TextFormat::Parse(&input, proto);
+    fs.close();
+    return success;
+}
+
 void ReadTFNetParamsFromBinaryFileOrDie(const char* param_file,
                                       tensorflow::GraphDef* param) {
   CHECK(ReadProtoFromBinaryFileTF(param_file, param))
       << "Failed to parse GraphDef file: " << param_file;
 }
 
+void ReadTFNetParamsFromTextFileOrDie(const char* param_file,
+                                      tensorflow::GraphDef* param) {
+  CHECK(ReadProtoFromTextFileTF(param_file, param))
+      << "Failed to parse GraphDef file: " << param_file;
+}
+
 }
 }
 #endif
index a3abd1d..151d5f5 100644 (file)
@@ -22,6 +22,9 @@ namespace dnn {
 void ReadTFNetParamsFromBinaryFileOrDie(const char* param_file,
                                       tensorflow::GraphDef* param);
 
+void ReadTFNetParamsFromTextFileOrDie(const char* param_file,
+                                      tensorflow::GraphDef* param);
+
 }
 }
 
index 56c55d6..cc120be 100644 (file)
@@ -714,23 +714,25 @@ struct TorchImporter : public ::cv::dnn::Importer
                 readTorchTable(scalarParams, tensorParams);
                 newModule->apiType = "Padding";
 
-                CV_Assert(scalarParams.has("pad") &&
-                          scalarParams.has("dim"));
+                CV_Assert(scalarParams.has("pad") && scalarParams.has("dim"));
+                if (scalarParams.has("index") && scalarParams.get<int>("index") != 1)
+                    CV_Error(Error::StsNotImplemented, "Padding with offset is not implemented");
 
-                layerParams.set("padding_dim",
-                                static_cast<int>(scalarParams.get<double>("dim") - 1));
-                layerParams.set("padding", static_cast<int>(scalarParams.get<double>("pad")));
+                if (scalarParams.has("value"))
+                    layerParams.set("value", scalarParams.get<float>("value"));
 
                 if (scalarParams.has("nInputDim"))
-                    layerParams.set("input_dims",
-                                    static_cast<int>(scalarParams.get<double>("nInputDim")));
+                    layerParams.set("input_dims", scalarParams.get<int>("nInputDim"));
 
-                if (scalarParams.has("value"))
-                    layerParams.set("value", scalarParams.get<double>("value"));
+                int dim = scalarParams.get<int>("dim") - 1;  // In Lua we start from 1.
+                int pad = scalarParams.get<int>("pad");
 
-                if (scalarParams.has("index"))
-                    layerParams.set("index",
-                                    static_cast<int>(scalarParams.get<double>("index") - 1));
+                std::vector<int> paddings((dim + 1) * 2, 0);
+                if (pad > 0)
+                    paddings[dim * 2 + 1] = pad;  // Pad after (right).
+                else
+                    paddings[dim * 2] = -pad;  // Pad before (left).
+                layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
 
                 curModule->modules.push_back(newModule);
             }
@@ -796,10 +798,7 @@ struct TorchImporter : public ::cv::dnn::Importer
                 layerParams.set("adj_h", static_cast<int>(scalarParams.get<double>("adjH")));
                 layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
 
-                Mat weights = tensorParams["weight"].second;
-                CV_Assert(weights.dims == 4);
-                int reorderedShape[] = { weights.size[1], weights.size[0], weights.size[2], weights.size[3] };
-                layerParams.blobs.push_back(weights.reshape(1, 4, reorderedShape));
+                layerParams.blobs.push_back(tensorParams["weight"].second);
 
                 bool bias = tensorParams.count("bias");
                 layerParams.set("bias_term", bias);
@@ -867,6 +866,31 @@ struct TorchImporter : public ::cv::dnn::Importer
                 layerParams.set("scale", scalarParams.get<float>("constant_scalar"));
                 curModule->modules.push_back(newModule);
             }
+            else if (nnName == "SpatialZeroPadding")
+            {
+                readTorchTable(scalarParams, tensorParams);
+                CV_Assert(scalarParams.has("pad_l"), scalarParams.has("pad_r"),
+                          scalarParams.has("pad_t"), scalarParams.has("pad_b"));
+                int padTop = scalarParams.get<int>("pad_t");
+                int padLeft = scalarParams.get<int>("pad_l");
+                int padRight = scalarParams.get<int>("pad_r");
+                int padBottom = scalarParams.get<int>("pad_b");
+                if (padTop < 0 || padLeft < 0 || padRight < 0 || padBottom < 0)
+                    CV_Error(Error::StsNotImplemented, "SpatialZeroPadding in cropping mode is not implemented");
+
+                newModule->apiType = "Padding";
+
+                // Torch's SpatialZeroPadding works with 3- or 4-dimensional input.
+                // So we add parameter input_dims=3 to ignore batch dimension if it will be.
+                std::vector<int> paddings(6, 0);  // CHW
+                paddings[2] = padTop;
+                paddings[3] = padBottom;
+                paddings[4] = padLeft;
+                paddings[5] = padRight;
+                layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
+                layerParams.set("input_dims", 3);
+                curModule->modules.push_back(newModule);
+            }
             else
             {
                 CV_Error(Error::StsNotImplemented, "Unknown nn class \"" + className + "\"");
index b0741f6..cc6f675 100644 (file)
@@ -80,12 +80,7 @@ TEST(Reproducibility_AlexNet, Accuracy)
     Mat sample = imread(_tf("grace_hopper_227.png"));
     ASSERT_TRUE(!sample.empty());
 
-    Size inputSize(227, 227);
-
-    if (sample.size() != inputSize)
-        resize(sample, sample, inputSize);
-
-    net.setInput(blobFromImage(sample), "data");
+    net.setInput(blobFromImage(sample, 1.0f, Size(227, 227), Scalar(), false), "data");
     Mat out = net.forward("prob");
     Mat ref = blobFromNPY(_tf("caffe_alexnet_prob.npy"));
     normAssert(ref, out);
@@ -105,17 +100,17 @@ TEST(Reproducibility_FCN, Accuracy)
     Mat sample = imread(_tf("street.png"));
     ASSERT_TRUE(!sample.empty());
 
-    Size inputSize(500, 500);
-    if (sample.size() != inputSize)
-        resize(sample, sample, inputSize);
-
     std::vector<int> layerIds;
     std::vector<size_t> weights, blobs;
     net.getMemoryConsumption(shape(1,3,227,227), layerIds, weights, blobs);
 
-    net.setInput(blobFromImage(sample), "data");
+    net.setInput(blobFromImage(sample, 1.0f, Size(500, 500), Scalar(), false), "data");
     Mat out = net.forward("score");
-    Mat ref = blobFromNPY(_tf("caffe_fcn8s_prob.npy"));
+
+    Mat refData = imread(_tf("caffe_fcn8s_prob.png"), IMREAD_ANYDEPTH);
+    int shape[] = {1, 21, 500, 500};
+    Mat ref(4, shape, CV_32FC1, refData.data);
+
     normAssert(ref, out);
 }
 #endif
@@ -136,10 +131,7 @@ TEST(Reproducibility_SSD, Accuracy)
     if (sample.channels() == 4)
         cvtColor(sample, sample, COLOR_BGRA2BGR);
 
-    sample.convertTo(sample, CV_32F);
-    resize(sample, sample, Size(300, 300));
-
-    Mat in_blob = blobFromImage(sample);
+    Mat in_blob = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false);
     net.setInput(in_blob, "data");
     Mat out = net.forward("detection_out");
 
@@ -152,7 +144,7 @@ TEST(Reproducibility_ResNet50, Accuracy)
     Net net = readNetFromCaffe(findDataFile("dnn/ResNet-50-deploy.prototxt", false),
                                findDataFile("dnn/ResNet-50-model.caffemodel", false));
 
-    Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1, Size(224,224));
+    Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(224,224), Scalar(), false);
     ASSERT_TRUE(!input.empty());
 
     net.setInput(input);
@@ -167,7 +159,7 @@ TEST(Reproducibility_SqueezeNet_v1_1, Accuracy)
     Net net = readNetFromCaffe(findDataFile("dnn/squeezenet_v1.1.prototxt", false),
                                findDataFile("dnn/squeezenet_v1.1.caffemodel", false));
 
-    Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1, Size(227,227));
+    Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(227,227), Scalar(), false);
     ASSERT_TRUE(!input.empty());
 
     net.setInput(input);
@@ -180,7 +172,7 @@ TEST(Reproducibility_SqueezeNet_v1_1, Accuracy)
 TEST(Reproducibility_AlexNet_fp16, Accuracy)
 {
     const float l1 = 1e-5;
-    const float lInf = 2e-4;
+    const float lInf = 3e-3;
 
     const string proto = findDataFile("dnn/bvlc_alexnet.prototxt", false);
     const string model = findDataFile("dnn/bvlc_alexnet.caffemodel", false);
@@ -190,7 +182,7 @@ TEST(Reproducibility_AlexNet_fp16, Accuracy)
 
     Mat sample = imread(findDataFile("dnn/grace_hopper_227.png", false));
 
-    net.setInput(blobFromImage(sample, 1, Size(227, 227)));
+    net.setInput(blobFromImage(sample, 1.0f, Size(227, 227), Scalar(), false));
     Mat out = net.forward();
     Mat ref = blobFromNPY(findDataFile("dnn/caffe_alexnet_prob.npy", false));
     normAssert(ref, out, "", l1, lInf);
@@ -212,11 +204,34 @@ TEST(Reproducibility_GoogLeNet_fp16, Accuracy)
     inpMats.push_back( imread(_tf("googlenet_1.png")) );
     ASSERT_TRUE(!inpMats[0].empty() && !inpMats[1].empty());
 
-    net.setInput(blobFromImages(inpMats), "data");
+    net.setInput(blobFromImages(inpMats, 1.0f, Size(), Scalar(), false), "data");
     Mat out = net.forward("prob");
 
     Mat ref = blobFromNPY(_tf("googlenet_prob.npy"));
     normAssert(out, ref, "", l1, lInf);
 }
 
+// https://github.com/richzhang/colorization
+TEST(Reproducibility_Colorization, Accuracy)
+{
+    const float l1 = 1e-5;
+    const float lInf = 3e-3;
+
+    Mat inp = blobFromNPY(_tf("colorization_inp.npy"));
+    Mat ref = blobFromNPY(_tf("colorization_out.npy"));
+    Mat kernel = blobFromNPY(_tf("colorization_pts_in_hull.npy"));
+
+    const string proto = findDataFile("dnn/colorization_deploy_v2.prototxt", false);
+    const string model = findDataFile("dnn/colorization_release_v2.caffemodel", false);
+    Net net = readNetFromCaffe(proto, model);
+
+    net.getLayer(net.getLayerId("class8_ab"))->blobs.push_back(kernel);
+    net.getLayer(net.getLayerId("conv8_313_rh"))->blobs.push_back(Mat(1, 313, CV_32F, 2.606));
+
+    net.setInput(inp);
+    Mat out = net.forward();
+
+    normAssert(out, ref, "", l1, lInf);
+}
+
 }
index 2486916..1bd3e51 100644 (file)
@@ -66,7 +66,27 @@ TEST(Reproducibility_GoogLeNet, Accuracy)
     inpMats.push_back( imread(_tf("googlenet_1.png")) );
     ASSERT_TRUE(!inpMats[0].empty() && !inpMats[1].empty());
 
-    net.setInput(blobFromImages(inpMats), "data");
+    net.setInput(blobFromImages(inpMats, 1.0f, Size(), Scalar(), false), "data");
+    Mat out = net.forward("prob");
+
+    Mat ref = blobFromNPY(_tf("googlenet_prob.npy"));
+    normAssert(out, ref);
+}
+
+OCL_TEST(Reproducibility_GoogLeNet, Accuracy)
+{
+    Net net = readNetFromCaffe(findDataFile("dnn/bvlc_googlenet.prototxt", false),
+                               findDataFile("dnn/bvlc_googlenet.caffemodel", false));
+
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(DNN_TARGET_OPENCL);
+
+    std::vector<Mat> inpMats;
+    inpMats.push_back( imread(_tf("googlenet_0.png")) );
+    inpMats.push_back( imread(_tf("googlenet_1.png")) );
+    ASSERT_TRUE(!inpMats[0].empty() && !inpMats[1].empty());
+
+    net.setInput(blobFromImages(inpMats, 1.0f, Size(), Scalar(), false), "data");
     Mat out = net.forward("prob");
 
     Mat ref = blobFromNPY(_tf("googlenet_prob.npy"));
@@ -84,7 +104,36 @@ TEST(IntermediateBlobs_GoogLeNet, Accuracy)
     blobsNames.push_back("inception_4c/1x1");
     blobsNames.push_back("inception_4c/relu_1x1");
     std::vector<Mat> outs;
-    Mat in = blobFromImage(imread(_tf("googlenet_0.png")));
+    Mat in = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(), Scalar(), false);
+    net.setInput(in, "data");
+    net.forward(outs, blobsNames);
+    CV_Assert(outs.size() == blobsNames.size());
+
+    for (int i = 0; i < blobsNames.size(); i++)
+    {
+        std::string filename = blobsNames[i];
+        std::replace( filename.begin(), filename.end(), '/', '#');
+        Mat ref = blobFromNPY(_tf("googlenet_" + filename + ".npy"));
+
+        normAssert(outs[i], ref, "", 1E-4, 1E-2);
+    }
+}
+
+OCL_TEST(IntermediateBlobs_GoogLeNet, Accuracy)
+{
+    Net net = readNetFromCaffe(findDataFile("dnn/bvlc_googlenet.prototxt", false),
+                               findDataFile("dnn/bvlc_googlenet.caffemodel", false));
+
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(DNN_TARGET_OPENCL);
+
+    std::vector<String> blobsNames;
+    blobsNames.push_back("conv1/7x7_s2");
+    blobsNames.push_back("conv1/relu_7x7");
+    blobsNames.push_back("inception_4c/1x1");
+    blobsNames.push_back("inception_4c/relu_1x1");
+    std::vector<Mat> outs;
+    Mat in = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(), Scalar(), false);
     net.setInput(in, "data");
     net.forward(outs, blobsNames);
     CV_Assert(outs.size() == blobsNames.size());
@@ -109,7 +158,39 @@ TEST(SeveralCalls_GoogLeNet, Accuracy)
     inpMats.push_back( imread(_tf("googlenet_1.png")) );
     ASSERT_TRUE(!inpMats[0].empty() && !inpMats[1].empty());
 
-    net.setInput(blobFromImages(inpMats), "data");
+    net.setInput(blobFromImages(inpMats, 1.0f, Size(), Scalar(), false), "data");
+    Mat out = net.forward();
+
+    Mat ref = blobFromNPY(_tf("googlenet_prob.npy"));
+    normAssert(out, ref);
+
+    std::vector<String> blobsNames;
+    blobsNames.push_back("conv1/7x7_s2");
+    std::vector<Mat> outs;
+    Mat in = blobFromImage(inpMats[0], 1.0f, Size(), Scalar(), false);
+    net.setInput(in, "data");
+    net.forward(outs, blobsNames);
+    CV_Assert(outs.size() == blobsNames.size());
+
+    ref = blobFromNPY(_tf("googlenet_conv1#7x7_s2.npy"));
+
+    normAssert(outs[0], ref, "", 1E-4, 1E-2);
+}
+
+OCL_TEST(SeveralCalls_GoogLeNet, Accuracy)
+{
+    Net net = readNetFromCaffe(findDataFile("dnn/bvlc_googlenet.prototxt", false),
+                               findDataFile("dnn/bvlc_googlenet.caffemodel", false));
+
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(DNN_TARGET_OPENCL);
+
+    std::vector<Mat> inpMats;
+    inpMats.push_back( imread(_tf("googlenet_0.png")) );
+    inpMats.push_back( imread(_tf("googlenet_1.png")) );
+    ASSERT_TRUE(!inpMats[0].empty() && !inpMats[1].empty());
+
+    net.setInput(blobFromImages(inpMats, 1.0f, Size(), Scalar(), false), "data");
     Mat out = net.forward();
 
     Mat ref = blobFromNPY(_tf("googlenet_prob.npy"));
@@ -118,7 +199,7 @@ TEST(SeveralCalls_GoogLeNet, Accuracy)
     std::vector<String> blobsNames;
     blobsNames.push_back("conv1/7x7_s2");
     std::vector<Mat> outs;
-    Mat in = blobFromImage(inpMats[0]);
+    Mat in = blobFromImage(inpMats[0], 1.0f, Size(), Scalar(), false);
     net.setInput(in, "data");
     net.forward(outs, blobsNames);
     CV_Assert(outs.size() == blobsNames.size());
index 79f767a..9b2144e 100644 (file)
@@ -35,6 +35,28 @@ static void test(LayerParams& params, Mat& input)
 }
 
 ////////////////////////////////////////////////////////////////////////////////
+// Padding
+////////////////////////////////////////////////////////////////////////////////
+TEST(Padding_Halide, Accuracy)
+{
+    static const int kNumRuns = 10;
+    std::vector<int> paddings(8);
+    for (int t = 0; t < kNumRuns; ++t)
+    {
+        for (int i = 0; i < paddings.size(); ++i)
+            paddings[i] = rand() % 5;
+
+        LayerParams lp;
+        lp.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
+        lp.type = "Padding";
+        lp.name = "testLayer";
+
+        Mat input({1 + rand() % 10, 1 + rand() % 10, 1 + rand() % 10, 1 + rand() % 10}, CV_32F);
+        test(lp, input);
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
 // Convolution
 ////////////////////////////////////////////////////////////////////////////////
 typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Size, bool> > Convolution;
@@ -107,7 +129,7 @@ TEST_P(Deconvolution, Accuracy)
     Size adjPad = Size(get<5>(GetParam())[2], get<5>(GetParam())[3]);
     bool hasBias = get<6>(GetParam());
 
-    Mat weights({outChannels, inChannels / group, kernel.height, kernel.width}, CV_32F);
+    Mat weights({inChannels, outChannels / group, kernel.height, kernel.width}, CV_32F);
     randu(weights, -1.0f, 1.0f);
 
     LayerParams lp;
@@ -139,7 +161,7 @@ TEST_P(Deconvolution, Accuracy)
 
 INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Deconvolution, Combine(
 /*in channels, out channels, group*/
-             Values(Vec3i(6, 4, 1), Vec3i(6, 9, 1)),
+             Values(Vec3i(6, 4, 1), Vec3i(6, 9, 3)),
 /*in size*/  Values(Size(5, 6)),
 /*kernel*/   Values(Size(3, 1), Size(1, 3)),
 /*pad*/      Values(Size(1, 0), Size(0, 1)),
index 9c6e61f..ac36d0e 100644 (file)
@@ -98,7 +98,8 @@ void runLayer(Ptr<Layer> layer, std::vector<Mat> &inpBlobs, std::vector<Mat> &ou
 }
 
 
-void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool useCommonInputBlob = true)
+void testLayerUsingCaffeModels(String basename, int targetId = DNN_TARGET_CPU,
+                               bool useCaffeModel = false, bool useCommonInputBlob = true)
 {
     String prototxt = _tf(basename + ".prototxt");
     String caffemodel = _tf(basename + ".caffemodel");
@@ -111,6 +112,9 @@ void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool
     Net net = readNetFromCaffe(prototxt, (useCaffeModel) ? caffemodel : String());
     ASSERT_FALSE(net.empty());
 
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(targetId);
+
     Mat inp = blobFromNPY(inpfile);
     Mat ref = blobFromNPY(outfile);
 
@@ -122,47 +126,82 @@ void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool
 
 TEST(Layer_Test_Softmax, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_softmax");
+    testLayerUsingCaffeModels("layer_softmax");
+}
+
+OCL_TEST(Layer_Test_Softmax, Accuracy)
+{
+    testLayerUsingCaffeModels("layer_softmax", DNN_TARGET_OPENCL);
 }
 
 TEST(Layer_Test_LRN_spatial, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_lrn_spatial");
+    testLayerUsingCaffeModels("layer_lrn_spatial");
+}
+
+OCL_TEST(Layer_Test_LRN_spatial, Accuracy)
+{
+    testLayerUsingCaffeModels("layer_lrn_spatial", DNN_TARGET_OPENCL);
 }
 
 TEST(Layer_Test_LRN_channels, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_lrn_channels");
+    testLayerUsingCaffeModels("layer_lrn_channels");
+}
+
+OCL_TEST(Layer_Test_LRN_channels, Accuracy)
+{
+    testLayerUsingCaffeModels("layer_lrn_channels", DNN_TARGET_OPENCL);
 }
 
 TEST(Layer_Test_Convolution, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_convolution", true);
+    testLayerUsingCaffeModels("layer_convolution", DNN_TARGET_CPU, true);
+}
+
+OCL_TEST(Layer_Test_Convolution, Accuracy)
+{
+    testLayerUsingCaffeModels("layer_convolution", DNN_TARGET_OPENCL, true);
 }
 
 TEST(Layer_Test_DeConvolution, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_deconvolution", true, false);
+    testLayerUsingCaffeModels("layer_deconvolution", DNN_TARGET_CPU, true, false);
 }
 
 TEST(Layer_Test_InnerProduct, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_inner_product", true);
+    testLayerUsingCaffeModels("layer_inner_product", DNN_TARGET_CPU, true);
+}
+
+OCL_TEST(Layer_Test_InnerProduct, Accuracy)
+{
+    testLayerUsingCaffeModels("layer_inner_product", DNN_TARGET_OPENCL, true);
 }
 
 TEST(Layer_Test_Pooling_max, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_pooling_max");
+    testLayerUsingCaffeModels("layer_pooling_max");
+}
+
+OCL_TEST(Layer_Test_Pooling_max, Accuracy)
+{
+    testLayerUsingCaffeModels("layer_pooling_max", DNN_TARGET_OPENCL);
 }
 
 TEST(Layer_Test_Pooling_ave, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_pooling_ave");
+    testLayerUsingCaffeModels("layer_pooling_ave");
+}
+
+OCL_TEST(Layer_Test_Pooling_ave, Accuracy)
+{
+    testLayerUsingCaffeModels("layer_pooling_ave", DNN_TARGET_OPENCL);
 }
 
 TEST(Layer_Test_MVN, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_mvn");
+    testLayerUsingCaffeModels("layer_mvn");
 }
 
 void testReshape(const MatShape& inputShape, const MatShape& targetShape,
@@ -207,22 +246,42 @@ TEST(Layer_Test_Reshape, Accuracy)
 
 TEST(Layer_Test_BatchNorm, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_batch_norm", true);
+    testLayerUsingCaffeModels("layer_batch_norm", DNN_TARGET_CPU, true);
 }
 
 TEST(Layer_Test_ReLU, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_relu");
+    testLayerUsingCaffeModels("layer_relu");
+}
+
+OCL_TEST(Layer_Test_ReLU, Accuracy)
+{
+    testLayerUsingCaffeModels("layer_relu", DNN_TARGET_OPENCL);
 }
 
 TEST(Layer_Test_Dropout, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_dropout");
+    testLayerUsingCaffeModels("layer_dropout");
 }
 
 TEST(Layer_Test_Concat, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_concat");
+    testLayerUsingCaffeModels("layer_concat");
+}
+
+OCL_TEST(Layer_Test_Concat, Accuracy)
+{
+    testLayerUsingCaffeModels("layer_concat", DNN_TARGET_OPENCL);
+}
+
+TEST(Layer_Test_Eltwise, Accuracy)
+{
+    testLayerUsingCaffeModels("layer_eltwise");
+}
+
+TEST(Layer_Test_PReLU, Accuracy)
+{
+    testLayerUsingCaffeModels("layer_prelu", DNN_TARGET_CPU, true);
 }
 
 //template<typename XMat>
@@ -289,7 +348,8 @@ public:
 
     Layer_LSTM_Test() {}
 
-    void init(const MatShape &inpShape_, const MatShape &outShape_)
+    void init(const MatShape &inpShape_, const MatShape &outShape_,
+              bool produceCellOutput, bool useTimestampDim)
     {
         numInp = total(inpShape_);
         numOut = total(outShape_);
@@ -298,8 +358,15 @@ public:
         Wx = Mat::ones(4 * numOut, numInp, CV_32F);
         b  = Mat::ones(4 * numOut, 1, CV_32F);
 
-        layer = LSTMLayer::create(LayerParams());
-        layer->setWeights(Wh, Wx, b);
+        LayerParams lp;
+        lp.blobs.resize(3);
+        lp.blobs[0] = Wh;
+        lp.blobs[1] = Wx;
+        lp.blobs[2] = b;
+        lp.set<bool>("produce_cell_output", produceCellOutput);
+        lp.set<bool>("use_timestamp_dim", useTimestampDim);
+
+        layer = LSTMLayer::create(lp);
         layer->setOutShape(outShape_);
     }
 };
@@ -312,9 +379,7 @@ TEST_F(Layer_LSTM_Test, get_set_test)
     MatShape inpResShape = concat(shape(TN), inpShape);
     MatShape outResShape = concat(shape(TN), outShape);
 
-    init(inpShape, outShape);
-    layer->setProduceCellOutput(true);
-    layer->setUseTimstampsDim(false);
+    init(inpShape, outShape, true, false);
     layer->setOutShape(outShape);
 
     Mat C((int)outResShape.size(), &outResShape[0], CV_32F);
@@ -344,12 +409,12 @@ TEST_F(Layer_LSTM_Test, get_set_test)
 
 TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent)
 {
-    Ptr<LSTMLayer> layer = LSTMLayer::create(LayerParams());
-
-    Mat Wx = blobFromNPY(_tf("lstm.prototxt.w_0.npy"));
-    Mat Wh = blobFromNPY(_tf("lstm.prototxt.w_2.npy"));
-    Mat b  = blobFromNPY(_tf("lstm.prototxt.w_1.npy"));
-    layer->setWeights(Wh, Wx, b);
+    LayerParams lp;
+    lp.blobs.resize(3);
+    lp.blobs[0] = blobFromNPY(_tf("lstm.prototxt.w_2.npy"));  // Wh
+    lp.blobs[1] = blobFromNPY(_tf("lstm.prototxt.w_0.npy"));  // Wx
+    lp.blobs[2] = blobFromNPY(_tf("lstm.prototxt.w_1.npy"));  // bias
+    Ptr<LSTMLayer> layer = LSTMLayer::create(lp);
 
     Mat inp = blobFromNPY(_tf("recurrent.input.npy"));
     std::vector<Mat> inputs(1, inp), outputs;
index 57227ff..e5b94ef 100644 (file)
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 
-// Copyright (C) 2016, Intel Corporation, all rights reserved.
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 
 /*
@@ -74,14 +74,15 @@ static std::string path(const std::string& file)
     return findDataFile("dnn/tensorflow/" + file, false);
 }
 
-static void runTensorFlowNet(const std::string& prefix,
+static void runTensorFlowNet(const std::string& prefix, bool hasText = false,
                              double l1 = 1e-5, double lInf = 1e-4)
 {
     std::string netPath = path(prefix + "_net.pb");
+    std::string netConfig = (hasText ? path(prefix + "_net.pbtxt") : "");
     std::string inpPath = path(prefix + "_in.npy");
     std::string outPath = path(prefix + "_out.npy");
 
-    Net net = readNetFromTensorflow(netPath);
+    Net net = readNetFromTensorflow(netPath, netConfig);
 
     cv::Mat input = blobFromNPY(inpPath);
     cv::Mat target = blobFromNPY(outPath);
@@ -103,6 +104,7 @@ TEST(Test_TensorFlow, padding)
 {
     runTensorFlowNet("padding_same");
     runTensorFlowNet("padding_valid");
+    runTensorFlowNet("spatial_padding");
 }
 
 TEST(Test_TensorFlow, eltwise_add_mul)
@@ -119,6 +121,7 @@ TEST(Test_TensorFlow, batch_norm)
 {
     runTensorFlowNet("batch_norm");
     runTensorFlowNet("fused_batch_norm");
+    runTensorFlowNet("batch_norm_text", true);
 }
 
 TEST(Test_TensorFlow, pooling)
@@ -146,21 +149,71 @@ TEST(Test_TensorFlow, defun)
 TEST(Test_TensorFlow, reshape)
 {
     runTensorFlowNet("shift_reshape_no_reorder");
+    runTensorFlowNet("reshape_reduce");
+    runTensorFlowNet("flatten", true);
 }
 
 TEST(Test_TensorFlow, fp16)
 {
     const float l1 = 1e-3;
     const float lInf = 1e-2;
-    runTensorFlowNet("fp16_single_conv", l1, lInf);
-    runTensorFlowNet("fp16_deconvolution", l1, lInf);
-    runTensorFlowNet("fp16_max_pool_odd_same", l1, lInf);
-    runTensorFlowNet("fp16_padding_valid", l1, lInf);
-    runTensorFlowNet("fp16_eltwise_add_mul", l1, lInf);
-    runTensorFlowNet("fp16_max_pool_odd_valid", l1, lInf);
-    runTensorFlowNet("fp16_pad_and_concat", l1, lInf);
-    runTensorFlowNet("fp16_max_pool_even", l1, lInf);
-    runTensorFlowNet("fp16_padding_same", l1, lInf);
+    runTensorFlowNet("fp16_single_conv", false, l1, lInf);
+    runTensorFlowNet("fp16_deconvolution", false, l1, lInf);
+    runTensorFlowNet("fp16_max_pool_odd_same", false, l1, lInf);
+    runTensorFlowNet("fp16_padding_valid", false, l1, lInf);
+    runTensorFlowNet("fp16_eltwise_add_mul", false, l1, lInf);
+    runTensorFlowNet("fp16_max_pool_odd_valid", false, l1, lInf);
+    runTensorFlowNet("fp16_pad_and_concat", false, l1, lInf);
+    runTensorFlowNet("fp16_max_pool_even", false, l1, lInf);
+    runTensorFlowNet("fp16_padding_same", false, l1, lInf);
+}
+
+TEST(Test_TensorFlow, MobileNet_SSD)
+{
+    std::string netPath = findDataFile("dnn/ssd_mobilenet_v1_coco.pb", false);
+    std::string netConfig = findDataFile("dnn/ssd_mobilenet_v1_coco.pbtxt", false);
+    std::string imgPath = findDataFile("dnn/street.png", false);
+
+    Mat inp;
+    resize(imread(imgPath), inp, Size(300, 300));
+    inp = blobFromImage(inp, 1.0f / 127.5, Size(), Scalar(127.5, 127.5, 127.5), true);
+
+    std::vector<String> outNames(3);
+    outNames[0] = "concat";
+    outNames[1] = "concat_1";
+    outNames[2] = "detection_out";
+
+    std::vector<Mat> target(outNames.size());
+    for (int i = 0; i < outNames.size(); ++i)
+    {
+        std::string path = findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco." + outNames[i] + ".npy", false);
+        target[i] = blobFromNPY(path);
+    }
+
+    Net net = readNetFromTensorflow(netPath, netConfig);
+    net.setInput(inp);
+
+    std::vector<Mat> output;
+    net.forward(output, outNames);
+
+    normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1));
+    normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 2e-4);
+    normAssert(target[2].reshape(1, 1), output[2].reshape(1, 1), "", 4e-5, 1e-2);
+}
+
+TEST(Test_TensorFlow, lstm)
+{
+    runTensorFlowNet("lstm", true);
+}
+
+TEST(Test_TensorFlow, split)
+{
+    runTensorFlowNet("split_equals");
+}
+
+TEST(Test_TensorFlow, resize_nearest_neighbor)
+{
+    runTensorFlowNet("resize_nearest_neighbor");
 }
 
 }
index fa444ed..d83c203 100644 (file)
@@ -44,6 +44,7 @@
 #include "test_precomp.hpp"
 #include "npy_blob.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
+#include <opencv2/ts/ocl_test.hpp>
 
 namespace cvtest
 {
@@ -70,7 +71,7 @@ TEST(Torch_Importer, simple_read)
     ASSERT_FALSE(net.empty());
 }
 
-static void runTorchNet(String prefix, String outLayerName = "",
+static void runTorchNet(String prefix, int targetId = DNN_TARGET_CPU, String outLayerName = "",
                         bool check2ndBlob = false, bool isBinary = false)
 {
     String suffix = (isBinary) ? ".dat" : ".txt";
@@ -78,6 +79,9 @@ static void runTorchNet(String prefix, String outLayerName = "",
     Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary);
     ASSERT_FALSE(net.empty());
 
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(targetId);
+
     Mat inp, outRef;
     ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) );
     ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) );
@@ -103,9 +107,19 @@ TEST(Torch_Importer, run_convolution)
     runTorchNet("net_conv");
 }
 
+OCL_TEST(Torch_Importer, run_convolution)
+{
+    runTorchNet("net_conv", DNN_TARGET_OPENCL);
+}
+
 TEST(Torch_Importer, run_pool_max)
 {
-    runTorchNet("net_pool_max", "", true);
+    runTorchNet("net_pool_max", DNN_TARGET_CPU, "", true);
+}
+
+OCL_TEST(Torch_Importer, run_pool_max)
+{
+    runTorchNet("net_pool_max", DNN_TARGET_OPENCL, "", true);
 }
 
 TEST(Torch_Importer, run_pool_ave)
@@ -113,12 +127,17 @@ TEST(Torch_Importer, run_pool_ave)
     runTorchNet("net_pool_ave");
 }
 
+OCL_TEST(Torch_Importer, run_pool_ave)
+{
+    runTorchNet("net_pool_ave", DNN_TARGET_OPENCL);
+}
+
 TEST(Torch_Importer, run_reshape)
 {
     runTorchNet("net_reshape");
     runTorchNet("net_reshape_batch");
     runTorchNet("net_reshape_single_sample");
-    runTorchNet("net_reshape_channels", "", false, true);
+    runTorchNet("net_reshape_channels", DNN_TARGET_CPU, "", false, true);
 }
 
 TEST(Torch_Importer, run_linear)
@@ -128,13 +147,19 @@ TEST(Torch_Importer, run_linear)
 
 TEST(Torch_Importer, run_paralel)
 {
-    runTorchNet("net_parallel", "l5_torchMerge");
+    runTorchNet("net_parallel", DNN_TARGET_CPU, "l5_torchMerge");
 }
 
 TEST(Torch_Importer, run_concat)
 {
-    runTorchNet("net_concat", "l5_torchMerge");
-    runTorchNet("net_depth_concat", "", false, true);
+    runTorchNet("net_concat", DNN_TARGET_CPU, "l5_torchMerge");
+    runTorchNet("net_depth_concat", DNN_TARGET_CPU, "", false, true);
+}
+
+OCL_TEST(Torch_Importer, run_concat)
+{
+    runTorchNet("net_concat", DNN_TARGET_OPENCL, "l5_torchMerge");
+    runTorchNet("net_depth_concat", DNN_TARGET_OPENCL, "", false, true);
 }
 
 TEST(Torch_Importer, run_deconv)
@@ -163,31 +188,49 @@ TEST(Torch_Importer, net_softmax)
     runTorchNet("net_softmax_spatial");
 }
 
+OCL_TEST(Torch_Importer, net_softmax)
+{
+    runTorchNet("net_softmax", DNN_TARGET_OPENCL);
+    runTorchNet("net_softmax_spatial", DNN_TARGET_OPENCL);
+}
+
 TEST(Torch_Importer, net_logsoftmax)
 {
     runTorchNet("net_logsoftmax");
     runTorchNet("net_logsoftmax_spatial");
 }
 
+OCL_TEST(Torch_Importer, net_logsoftmax)
+{
+    runTorchNet("net_logsoftmax", DNN_TARGET_OPENCL);
+    runTorchNet("net_logsoftmax_spatial", DNN_TARGET_OPENCL);
+}
+
 TEST(Torch_Importer, net_lp_pooling)
 {
-    runTorchNet("net_lp_pooling_square", "", false, true);
-    runTorchNet("net_lp_pooling_power", "", false, true);
+    runTorchNet("net_lp_pooling_square", DNN_TARGET_CPU, "", false, true);
+    runTorchNet("net_lp_pooling_power", DNN_TARGET_CPU, "", false, true);
 }
 
 TEST(Torch_Importer, net_conv_gemm_lrn)
 {
-    runTorchNet("net_conv_gemm_lrn", "", false, true);
+    runTorchNet("net_conv_gemm_lrn", DNN_TARGET_CPU, "", false, true);
 }
 
 TEST(Torch_Importer, net_inception_block)
 {
-    runTorchNet("net_inception_block", "", false, true);
+    runTorchNet("net_inception_block", DNN_TARGET_CPU, "", false, true);
 }
 
 TEST(Torch_Importer, net_normalize)
 {
-    runTorchNet("net_normalize", "", false, true);
+    runTorchNet("net_normalize", DNN_TARGET_CPU, "", false, true);
+}
+
+TEST(Torch_Importer, net_padding)
+{
+    runTorchNet("net_padding", DNN_TARGET_CPU, "", false, true);
+    runTorchNet("net_spatial_zero_padding", DNN_TARGET_CPU, "", false, true);
 }
 
 TEST(Torch_Importer, ENet_accuracy)
@@ -239,6 +282,62 @@ TEST(Torch_Importer, OpenFace_accuracy)
     normAssert(out, outRef);
 }
 
+OCL_TEST(Torch_Importer, OpenFace_accuracy)
+{
+    const string model = findDataFile("dnn/openface_nn4.small2.v1.t7", false);
+    Net net = readNetFromTorch(model);
+
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(DNN_TARGET_OPENCL);
+
+    Mat sample = imread(findDataFile("cv/shared/lena.png", false));
+    Mat sampleF32(sample.size(), CV_32FC3);
+    sample.convertTo(sampleF32, sampleF32.type());
+    sampleF32 /= 255;
+    resize(sampleF32, sampleF32, Size(96, 96), 0, 0, INTER_NEAREST);
+
+    Mat inputBlob = blobFromImage(sampleF32);
+
+    net.setInput(inputBlob);
+    Mat out = net.forward();
+
+    Mat outRef = readTorchBlob(_tf("net_openface_output.dat"), true);
+    normAssert(out, outRef);
+}
+
+OCL_TEST(Torch_Importer, ENet_accuracy)
+{
+    Net net;
+    {
+        const string model = findDataFile("dnn/Enet-model-best.net", false);
+        Ptr<Importer> importer = createTorchImporter(model, true);
+        ASSERT_TRUE(importer != NULL);
+        importer->populateNet(net);
+    }
+
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(DNN_TARGET_OPENCL);
+
+    Mat sample = imread(_tf("street.png", false));
+    Mat inputBlob = blobFromImage(sample, 1./255);
+
+    net.setInput(inputBlob, "");
+    Mat out = net.forward();
+    Mat ref = blobFromNPY(_tf("torch_enet_prob.npy", false));
+    // Due to numerical instability in Pooling-Unpooling layers (indexes jittering)
+    // thresholds for ENet must be changed. Accuracy of resuults was checked on
+    // Cityscapes dataset and difference in mIOU with Torch is 10E-4%
+    normAssert(ref, out, "", 0.00044, 0.44);
+
+    const int N = 3;
+    for (int i = 0; i < N; i++)
+    {
+        net.setInput(inputBlob, "");
+        Mat out = net.forward();
+        normAssert(ref, out, "", 0.00044, 0.44);
+    }
+}
+
 }
 
 #endif
index 4fba298..99d7188 100644 (file)
@@ -38,13 +38,17 @@ file(GLOB highgui_ext_hdrs
 list(REMOVE_ITEM highgui_ext_hdrs "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/highgui_winrt.hpp")
 
 if(HAVE_QT5)
-  set(CMAKE_AUTOMOC ON)
+  # "Automoc" doesn't work properly with opencv_world build, use QT5_WRAP_CPP() directly
+  #set(CMAKE_AUTOMOC ON)
+
   set(CMAKE_INCLUDE_CURRENT_DIR ON)
 
   QT5_ADD_RESOURCES(_RCC_OUTFILES ${CMAKE_CURRENT_LIST_DIR}/src/window_QT.qrc)
+  QT5_WRAP_CPP(_MOC_OUTFILES ${CMAKE_CURRENT_LIST_DIR}/src/window_QT.h)
   list(APPEND highgui_srcs
        ${CMAKE_CURRENT_LIST_DIR}/src/window_QT.cpp
        ${CMAKE_CURRENT_LIST_DIR}/src/window_QT.h
+       ${_MOC_OUTFILES}
        ${_RCC_OUTFILES})
 
   foreach(dt5_dep Core Gui Widgets Test Concurrent)
index 3c7edf0..3817c41 100644 (file)
@@ -375,6 +375,9 @@ decode_rle4_bad: ;
                                                 gray_palette[code] );
 
                         line_end_flag = y - prev_y;
+
+                        if( y >= m_height )
+                            break;
                     }
                     else if( code > 2 ) // absolute mode
                     {
@@ -478,8 +481,10 @@ decode_rle8_bad: ;
 
                 if( !color )
                     icvCvt_BGRA2Gray_8u_C4C1R( src, 0, data, 0, cvSize(m_width,1) );
-                else
-                    icvCvt_BGRA2BGR_8u_C4C3R( src, 0, data, 0, cvSize(m_width,1) );
+                else if( img.channels() == 3 )
+                    icvCvt_BGRA2BGR_8u_C4C3R(src, 0, data, 0, cvSize(m_width, 1));
+                else if( img.channels() == 4 )
+                    memcpy(data, src, m_width * 4);
             }
             result = true;
             break;
index 61f4c64..0aa64e3 100644 (file)
 \****************************************************************************************/
 
 #include "precomp.hpp"
+
+#ifdef HAVE_TIFF
 #include "grfmt_tiff.hpp"
-#include <opencv2/imgproc.hpp>
 #include <limits>
 
+// TODO FIXIT Conflict declarations for common types like int64/uint64
+namespace tiff_dummy_namespace {
+#include "tiff.h"
+#include "tiffio.h"
+}
+using namespace tiff_dummy_namespace;
+
 namespace cv
 {
-static const char fmtSignTiffII[] = "II\x2a\x00";
 
-#ifdef HAVE_TIFF
 
+static const char fmtSignTiffII[] = "II\x2a\x00";
 static const char fmtSignTiffMM[] = "MM\x00\x2a";
 
-#include "tiff.h"
-#include "tiffio.h"
-
 static int grfmt_tiff_err_handler_init = 0;
 static void GrFmtSilentTIFFErrorHandler( const char*, const char*, va_list ) {}
 
@@ -119,19 +123,24 @@ ImageDecoder TiffDecoder::newDecoder() const
 
 class TiffDecoderBufHelper
 {
+    Mat& m_buf;
+    size_t& m_buf_pos;
 public:
+    TiffDecoderBufHelper(Mat& buf, size_t& buf_pos) :
+        m_buf(buf), m_buf_pos(buf_pos)
+    {}
     static tmsize_t read( thandle_t handle, void* buffer, tmsize_t n )
     {
-        TiffDecoder *decoder = reinterpret_cast<TiffDecoder*>(handle);
-        const Mat& buf = decoder->m_buf;
+        TiffDecoderBufHelper *helper = reinterpret_cast<TiffDecoderBufHelper*>(handle);
+        const Mat& buf = helper->m_buf;
         const tmsize_t size = buf.cols*buf.rows*buf.elemSize();
-        tmsize_t pos = decoder->m_buf_pos;
+        tmsize_t pos = helper->m_buf_pos;
         if ( n > (size - pos) )
         {
             n = size - pos;
         }
         memcpy(buffer, buf.ptr() + pos, n);
-        decoder->m_buf_pos += n;
+        helper->m_buf_pos += n;
         return n;
     }
 
@@ -143,10 +152,10 @@ public:
 
     static toff_t seek( thandle_t handle, toff_t offset, int whence )
     {
-        TiffDecoder *decoder = reinterpret_cast<TiffDecoder*>(handle);
-        const Mat& buf = decoder->m_buf;
+        TiffDecoderBufHelper *helper = reinterpret_cast<TiffDecoderBufHelper*>(handle);
+        const Mat& buf = helper->m_buf;
         const toff_t size = buf.cols*buf.rows*buf.elemSize();
-        toff_t new_pos = decoder->m_buf_pos;
+        toff_t new_pos = helper->m_buf_pos;
         switch (whence)
         {
             case SEEK_SET:
@@ -160,14 +169,14 @@ public:
                 break;
         }
         new_pos = std::min(new_pos, size);
-        decoder->m_buf_pos = (size_t)new_pos;
+        helper->m_buf_pos = (size_t)new_pos;
         return new_pos;
     }
 
     static int map( thandle_t handle, void** base, toff_t* size )
     {
-        TiffDecoder *decoder = reinterpret_cast<TiffDecoder*>(handle);
-        Mat& buf = decoder->m_buf;
+        TiffDecoderBufHelper *helper = reinterpret_cast<TiffDecoderBufHelper*>(handle);
+        Mat& buf = helper->m_buf;
         *base = buf.ptr();
         *size = buf.cols*buf.rows*buf.elemSize();
         return 0;
@@ -175,14 +184,15 @@ public:
 
     static toff_t size( thandle_t handle )
     {
-        TiffDecoder *decoder = reinterpret_cast<TiffDecoder*>(handle);
-        const Mat& buf = decoder->m_buf;
+        TiffDecoderBufHelper *helper = reinterpret_cast<TiffDecoderBufHelper*>(handle);
+        const Mat& buf = helper->m_buf;
         return buf.cols*buf.rows*buf.elemSize();
     }
 
-    static int close( thandle_t /*handle*/ )
+    static int close( thandle_t handle )
     {
-        // Do nothing.
+        TiffDecoderBufHelper *helper = reinterpret_cast<TiffDecoderBufHelper*>(handle);
+        delete helper;
         return 0;
     }
 };
@@ -199,7 +209,8 @@ bool TiffDecoder::readHeader()
         if ( !m_buf.empty() )
         {
             m_buf_pos = 0;
-            tif = TIFFClientOpen( "", "r", reinterpret_cast<thandle_t>(this), &TiffDecoderBufHelper::read,
+            TiffDecoderBufHelper* buf_helper = new TiffDecoderBufHelper(this->m_buf, this->m_buf_pos);
+            tif = TIFFClientOpen( "", "r", reinterpret_cast<thandle_t>(buf_helper), &TiffDecoderBufHelper::read,
                                   &TiffDecoderBufHelper::write, &TiffDecoderBufHelper::seek,
                                   &TiffDecoderBufHelper::close, &TiffDecoderBufHelper::size,
                                   &TiffDecoderBufHelper::map, /*unmap=*/0 );
@@ -548,8 +559,6 @@ bool TiffDecoder::readHdrData(Mat& img)
     return true;
 }
 
-#endif
-
 //////////////////////////////////////////////////////////////////////////////////////////
 
 TiffEncoder::TiffEncoder()
@@ -569,11 +578,7 @@ ImageEncoder TiffEncoder::newEncoder() const
 
 bool TiffEncoder::isFormatSupported( int depth ) const
 {
-#ifdef HAVE_TIFF
     return depth == CV_8U || depth == CV_16U || depth == CV_32F;
-#else
-    return depth == CV_8U || depth == CV_16U;
-#endif
 }
 
 void  TiffEncoder::writeTag( WLByteStream& strm, TiffTag tag,
@@ -586,8 +591,6 @@ void  TiffEncoder::writeTag( WLByteStream& strm, TiffTag tag,
     strm.putDWord( value );
 }
 
-#ifdef HAVE_TIFF
-
 class TiffEncoderBufHelper
 {
 public:
@@ -854,204 +857,20 @@ bool TiffEncoder::writeHdr(const Mat& _img)
     return true;
 }
 
-#endif
-
-#ifdef HAVE_TIFF
 bool  TiffEncoder::write( const Mat& img, const std::vector<int>& params)
-#else
-bool  TiffEncoder::write( const Mat& img, const std::vector<int>& /*params*/)
-#endif
 {
     int depth = img.depth();
-#ifdef HAVE_TIFF
+
     if(img.type() == CV_32FC3)
     {
-        return writeHdr(img);
+        return writeHdr(img); // TODO Rename
     }
-#endif
 
-    if (depth != CV_8U && depth != CV_16U)
-        return false;
+    CV_Assert(depth == CV_8U || depth == CV_16U);
 
-#ifdef HAVE_TIFF
     return writeLibTiff(img, params);
-#else
-    int channels = img.channels();
-    int width = img.cols, height = img.rows;
-    int bytesPerChannel = depth == CV_8U ? 1 : 2;
-    int fileStep = width * channels * bytesPerChannel;
-
-    WLByteStream strm;
-
-    if( m_buf )
-    {
-        if( !strm.open(*m_buf) )
-            return false;
-    }
-    else
-    {
-      if( !strm.open(m_filename) )
-          return false;
-    }
-
-    int rowsPerStrip = (1 << 13)/fileStep;
-
-    if( rowsPerStrip < 1 )
-        rowsPerStrip = 1;
-
-    if( rowsPerStrip > height )
-        rowsPerStrip = height;
-
-    int i, stripCount = (height + rowsPerStrip - 1) / rowsPerStrip;
-
-    if( m_buf )
-        m_buf->reserve( alignSize(stripCount*8 + fileStep*height + 256, 256) );
-
-/*#if defined _DEBUG || !defined _WIN32
-    int uncompressedRowSize = rowsPerStrip * fileStep;
-#endif*/
-    int directoryOffset = 0;
-
-    AutoBuffer<int> stripOffsets(stripCount);
-    AutoBuffer<short> stripCounts(stripCount);
-    AutoBuffer<uchar> _buffer(fileStep+32);
-    uchar* buffer = _buffer;
-    int  stripOffsetsOffset = 0;
-    int  stripCountsOffset = 0;
-    int  bitsPerSample = 8 * bytesPerChannel;
-    int  y = 0;
-
-    strm.putBytes( fmtSignTiffII, 4 );
-    strm.putDWord( directoryOffset );
-
-    // write an image data first (the most reasonable way
-    // for compressed images)
-    for( i = 0; i < stripCount; i++ )
-    {
-        int limit = y + rowsPerStrip;
-
-        if( limit > height )
-            limit = height;
-
-        stripOffsets[i] = strm.getPos();
-
-        for( ; y < limit; y++ )
-        {
-            if( channels == 3 )
-            {
-                if (depth == CV_8U)
-                    icvCvt_BGR2RGB_8u_C3R( img.ptr(y), 0, buffer, 0, cvSize(width,1) );
-                else
-                    icvCvt_BGR2RGB_16u_C3R( img.ptr<ushort>(y), 0, (ushort*)buffer, 0, cvSize(width,1) );
-            }
-            else
-            {
-              if( channels == 4 )
-              {
-                if (depth == CV_8U)
-                    icvCvt_BGRA2RGBA_8u_C4R( img.ptr(y), 0, buffer, 0, cvSize(width,1) );
-                else
-                    icvCvt_BGRA2RGBA_16u_C4R( img.ptr<ushort>(y), 0, (ushort*)buffer, 0, cvSize(width,1) );
-              }
-            }
-
-            strm.putBytes( channels > 1 ? buffer : img.ptr(y), fileStep );
-        }
-
-        stripCounts[i] = (short)(strm.getPos() - stripOffsets[i]);
-        /*assert( stripCounts[i] == uncompressedRowSize ||
-                stripCounts[i] < uncompressedRowSize &&
-                i == stripCount - 1);*/
-    }
-
-    if( stripCount > 2 )
-    {
-        stripOffsetsOffset = strm.getPos();
-        for( i = 0; i < stripCount; i++ )
-            strm.putDWord( stripOffsets[i] );
-
-        stripCountsOffset = strm.getPos();
-        for( i = 0; i < stripCount; i++ )
-            strm.putWord( stripCounts[i] );
-    }
-    else if(stripCount == 2)
-    {
-        stripOffsetsOffset = strm.getPos();
-        for (i = 0; i < stripCount; i++)
-        {
-            strm.putDWord (stripOffsets [i]);
-        }
-        stripCountsOffset = stripCounts [0] + (stripCounts [1] << 16);
-    }
-    else
-    {
-        stripOffsetsOffset = stripOffsets[0];
-        stripCountsOffset = stripCounts[0];
-    }
-
-    if( channels > 1 )
-    {
-        int bitsPerSamplePos = strm.getPos();
-        strm.putWord(bitsPerSample);
-        strm.putWord(bitsPerSample);
-        strm.putWord(bitsPerSample);
-        if( channels == 4 )
-            strm.putWord(bitsPerSample);
-        bitsPerSample = bitsPerSamplePos;
-    }
-
-    directoryOffset = strm.getPos();
-
-    // write header
-    strm.putWord( 9 );
-
-    /* warning: specification 5.0 of Tiff want to have tags in
-       ascending order. This is a non-fatal error, but this cause
-       warning with some tools. So, keep this in ascending order */
-
-    writeTag( strm, TIFF_TAG_WIDTH, TIFF_TYPE_LONG, 1, width );
-    writeTag( strm, TIFF_TAG_HEIGHT, TIFF_TYPE_LONG, 1, height );
-    writeTag( strm, TIFF_TAG_BITS_PER_SAMPLE,
-              TIFF_TYPE_SHORT, channels, bitsPerSample );
-    writeTag( strm, TIFF_TAG_COMPRESSION, TIFF_TYPE_LONG, 1, TIFF_UNCOMP );
-    writeTag( strm, TIFF_TAG_PHOTOMETRIC, TIFF_TYPE_SHORT, 1, channels > 1 ? 2 : 1 );
-
-    writeTag( strm, TIFF_TAG_STRIP_OFFSETS, TIFF_TYPE_LONG,
-              stripCount, stripOffsetsOffset );
-
-    writeTag( strm, TIFF_TAG_SAMPLES_PER_PIXEL, TIFF_TYPE_SHORT, 1, channels );
-    writeTag( strm, TIFF_TAG_ROWS_PER_STRIP, TIFF_TYPE_LONG, 1, rowsPerStrip );
-
-    writeTag( strm, TIFF_TAG_STRIP_COUNTS,
-              stripCount > 1 ? TIFF_TYPE_SHORT : TIFF_TYPE_LONG,
-              stripCount, stripCountsOffset );
+}
 
-    strm.putDWord(0);
-    strm.close();
+} // namespace
 
-    if( m_buf )
-    {
-        (*m_buf)[4] = (uchar)directoryOffset;
-        (*m_buf)[5] = (uchar)(directoryOffset >> 8);
-        (*m_buf)[6] = (uchar)(directoryOffset >> 16);
-        (*m_buf)[7] = (uchar)(directoryOffset >> 24);
-    }
-    else
-    {
-        // write directory offset
-        FILE* f = fopen( m_filename.c_str(), "r+b" );
-        buffer[0] = (uchar)directoryOffset;
-        buffer[1] = (uchar)(directoryOffset >> 8);
-        buffer[2] = (uchar)(directoryOffset >> 16);
-        buffer[3] = (uchar)(directoryOffset >> 24);
-
-        fseek( f, 4, SEEK_SET );
-        fwrite( buffer, 1, 4, f );
-        fclose(f);
-    }
-
-    return true;
 #endif
-}
-
-}
index cd51cad..95c6179 100644 (file)
@@ -45,6 +45,8 @@
 
 #include "grfmt_base.hpp"
 
+#ifdef HAVE_TIFF
+
 namespace cv
 {
 
@@ -87,12 +89,7 @@ enum TiffFieldType
 };
 
 
-#ifdef HAVE_TIFF
-
 // libtiff based TIFF codec
-
-class TiffDecoderBufHelper;
-
 class TiffDecoder : public BaseImageDecoder
 {
 public:
@@ -109,9 +106,6 @@ public:
     ImageDecoder newDecoder() const;
 
 protected:
-
-    friend class TiffDecoderBufHelper;
-
     void* m_tif;
     int normalizeChannelsNumber(int channels) const;
     bool readHdrData(Mat& img);
@@ -123,8 +117,6 @@ private:
     TiffDecoder& operator=(const TiffDecoder &); // assign disabled
 };
 
-#endif
-
 // ... and writer
 class TiffEncoder : public BaseImageEncoder
 {
@@ -144,8 +136,14 @@ protected:
 
     bool writeLibTiff( const Mat& img, const std::vector<int>& params );
     bool writeHdr( const Mat& img );
+
+private:
+    TiffEncoder(const TiffEncoder &); // copy disabled
+    TiffEncoder& operator=(const TiffEncoder &); // assign disabled
 };
 
 }
 
+#endif // HAVE_TIFF
+
 #endif/*_GRFMT_TIFF_H_*/
index 5ee4ca3..efbd52e 100644 (file)
@@ -147,8 +147,8 @@ struct ImageCodecInitializer
         encoders.push_back( makePtr<PxMEncoder>() );
     #ifdef HAVE_TIFF
         decoders.push_back( makePtr<TiffDecoder>() );
-    #endif
         encoders.push_back( makePtr<TiffEncoder>() );
+    #endif
     #ifdef HAVE_PNG
         decoders.push_back( makePtr<PngDecoder>() );
         encoders.push_back( makePtr<PngEncoder>() );
index 87d0711..5cfb616 100644 (file)
@@ -1,3 +1,3 @@
 set(the_description "Image Processing")
 ocv_add_dispatched_file(accum SSE2 AVX NEON)
-ocv_define_module(imgproc opencv_core WRAP java python)
+ocv_define_module(imgproc opencv_core WRAP java python js)
index 5a127fb..076af52 100644 (file)
@@ -4066,6 +4066,88 @@ border of the containing Mat element.
  */
 CV_EXPORTS_W RotatedRect fitEllipse( InputArray points );
 
+/** @brief Fits an ellipse around a set of 2D points.
+
+ The function calculates the ellipse that fits a set of 2D points.
+ It returns the rotated rectangle in which the ellipse is inscribed.
+ The Approximate Mean Square (AMS) proposed by @cite Taubin1991 is used.
+
+ For an ellipse, this basis set is \f$ \chi= \left(x^2, x y, y^2, x, y, 1\right) \f$,
+ which is a set of six free coefficients \f$ A^T=\left\{A_{\text{xx}},A_{\text{xy}},A_{\text{yy}},A_x,A_y,A_0\right\} \f$.
+ However, to specify an ellipse, all that is needed is five numbers; the major and minor axes lengths \f$ (a,b) \f$,
+ the position \f$ (x_0,y_0) \f$, and the orientation \f$ \theta \f$. This is because the basis set includes lines,
+ quadratics, parabolic and hyperbolic functions as well as elliptical functions as possible fits.
+ If the fit is found to be a parabolic or hyperbolic function then the standard fitEllipse method is used.
+ The AMS method restricts the fit to parabolic, hyperbolic and elliptical curves
+ by imposing the condition that \f$ A^T ( D_x^T D_x  +   D_y^T D_y) A = 1 \f$ where
+ the matrices \f$ Dx \f$ and \f$ Dy \f$ are the partial derivatives of the design matrix \f$ D \f$ with
+ respect to x and y. The matrices are formed row by row applying the following to
+ each of the points in the set:
+ \f{align*}{
+ D(i,:)&=\left\{x_i^2, x_i y_i, y_i^2, x_i, y_i, 1\right\} &
+ D_x(i,:)&=\left\{2 x_i,y_i,0,1,0,0\right\} &
+ D_y(i,:)&=\left\{0,x_i,2 y_i,0,1,0\right\}
+ \f}
+ The AMS method minimizes the cost function
+ \f{equation*}{
+ \epsilon ^2=\frac{ A^T D^T D A }{ A^T (D_x^T D_x +  D_y^T D_y) A^T }
+ \f}
+
+ The minimum cost is found by solving the generalized eigenvalue problem.
+
+ \f{equation*}{
+ D^T D A = \lambda  \left( D_x^T D_x +  D_y^T D_y\right) A
+ \f}
+
+ @param points Input 2D point set, stored in std::vector\<\> or Mat
+ */
+CV_EXPORTS_W RotatedRect fitEllipseAMS( InputArray points );
+
+
+/** @brief Fits an ellipse around a set of 2D points.
+
+ The function calculates the ellipse that fits a set of 2D points.
+ It returns the rotated rectangle in which the ellipse is inscribed.
+ The Direct least square (Direct) method by @cite Fitzgibbon1999 is used.
+
+ For an ellipse, this basis set is \f$ \chi= \left(x^2, x y, y^2, x, y, 1\right) \f$,
+ which is a set of six free coefficients \f$ A^T=\left\{A_{\text{xx}},A_{\text{xy}},A_{\text{yy}},A_x,A_y,A_0\right\} \f$.
+ However, to specify an ellipse, all that is needed is five numbers; the major and minor axes lengths \f$ (a,b) \f$,
+ the position \f$ (x_0,y_0) \f$, and the orientation \f$ \theta \f$. This is because the basis set includes lines,
+ quadratics, parabolic and hyperbolic functions as well as elliptical functions as possible fits.
+ The Direct method confines the fit to ellipses by ensuring that \f$ 4 A_{xx} A_{yy}- A_{xy}^2 > 0 \f$.
+ The condition imposed is that \f$ 4 A_{xx} A_{yy}- A_{xy}^2=1 \f$ which satisfies the inequality
+ and as the coefficients can be arbitrarily scaled is not overly restrictive.
+
+ \f{equation*}{
+ \epsilon ^2= A^T D^T D A \quad \text{with} \quad A^T C A =1 \quad \text{and} \quad C=\left(\begin{matrix}
+ 0 & 0  & 2  & 0  & 0  &  0  \\
+ 0 & -1  & 0  & 0  & 0  &  0 \\
+ 2 & 0  & 0  & 0  & 0  &  0 \\
+ 0 & 0  & 0  & 0  & 0  &  0 \\
+ 0 & 0  & 0  & 0  & 0  &  0 \\
+ 0 & 0  & 0  & 0  & 0  &  0
+ \end{matrix} \right)
+ \f}
+
+ The minimum cost is found by solving the generalized eigenvalue problem.
+
+ \f{equation*}{
+ D^T D A = \lambda  \left( C\right) A
+ \f}
+
+ The system produces only one positive eigenvalue \f$ \lambda\f$ which is chosen as the solution
+ with its eigenvector \f$\mathbf{u}\f$. These are used to find the coefficients
+
+ \f{equation*}{
+ A = \sqrt{\frac{1}{\mathbf{u}^T C \mathbf{u}}}  \mathbf{u}
+ \f}
+ The scaling factor guarantees that  \f$A^T C A =1\f$.
+
+ @param points Input 2D point set, stored in std::vector\<\> or Mat
+ */
+CV_EXPORTS_W RotatedRect fitEllipseDirect( InputArray points );
+
 /** @brief Fits a line to a 2D or 3D point set.
 
 The function fitLine fits a line to a 2D or 3D point set by minimizing \f$\sum_i \rho(r_i)\f$ where
index 7811193..d3186d6 100644 (file)
@@ -120,3 +120,31 @@ PERF_TEST_P(MatInfo_Size_Scale_Area, ResizeArea,
     //difference equal to 1 is allowed because of different possible rounding modes: round-to-nearest vs bankers' rounding
     SANITY_CHECK(dst, 1);
 }
+
+typedef MatInfo_Size_Scale_Area MatInfo_Size_Scale_NN;
+
+PERF_TEST_P(MatInfo_Size_Scale_NN, ResizeNN,
+    testing::Combine(
+        testing::Values(CV_8UC1, CV_8UC2, CV_8UC4),
+        testing::Values(szVGA, szqHD, sz720p, sz1080p, sz2160p),
+        testing::Values(2.4, 3.4, 1.3)
+    )
+)
+{
+    int matType = get<0>(GetParam());
+    Size from = get<1>(GetParam());
+    double scale = get<2>(GetParam());
+
+    cv::Mat src(from, matType);
+
+    Size to(cvRound(from.width * scale), cvRound(from.height * scale));
+    cv::Mat dst(to, matType);
+
+    declare.in(src, WARMUP_RNG).out(dst);
+    declare.time(100);
+
+    TEST_CYCLE() resize(src, dst, dst.size(), 0, 0, INTER_NEAREST);
+
+    EXPECT_GT(countNonZero(dst.reshape(1)), 0);
+    SANITY_CHECK_NOTHING();
+}
index a171824..de9b93c 100644 (file)
@@ -275,7 +275,7 @@ struct VxKeypointsComparator
 
 static bool openvx_harris(Mat image, OutputArray _corners,
                           int _maxCorners, double _qualityLevel, double _minDistance,
-                          int _blockSize, int gradiantSize, double _harrisK)
+                          int _blockSize, int _gradientSize, double _harrisK)
 {
     using namespace ivx;
 
@@ -295,7 +295,7 @@ static bool openvx_harris(Mat image, OutputArray _corners,
         ivx::Scalar strengthThresh = ivx::Scalar::create<VX_TYPE_FLOAT32>(context, 0);
 
         //The gradient window size to use on the input.
-        vx_int32 gradientSize = 3;
+        vx_int32 gradientSize = _gradientSize;
 
         //The block window size used to compute the harris corner score
         vx_int32 blockSize = _blockSize;
@@ -379,7 +379,7 @@ void cv::goodFeaturesToTrack( InputArray _image, OutputArray _corners,
     // Disabled due to bad accuracy
     CV_OVX_RUN(false && useHarrisDetector && _mask.empty() &&
                !ovx::skipSmallImages<VX_KERNEL_HARRIS_CORNERS>(image.cols, image.rows),
-               openvx_harris(image, _corners, maxCorners, qualityLevel, minDistance, blockSize, gradiantSize, harrisK))
+               openvx_harris(image, _corners, maxCorners, qualityLevel, minDistance, blockSize, gradientSize, harrisK))
 
     if( useHarrisDetector )
         cornerHarris( image, eig, blockSize, gradientSize, harrisK );
index 665e9a3..d8c0284 100644 (file)
@@ -1112,8 +1112,8 @@ icvHoughCirclesGradient( CvMat* img, float dp, float min_dist,
         {
             int base = y*(acols+2) + x;
             if( adata[base] > acc_threshold &&
-                adata[base] > adata[base-1] && adata[base] > adata[base+1] &&
-                adata[base] > adata[base-acols-2] && adata[base] > adata[base+acols+2] )
+                adata[base] > adata[base-1] && adata[base] >= adata[base+1] &&
+                adata[base] > adata[base-acols-2] && adata[base] >= adata[base+acols+2] )
                 cvSeqPush(centers, &base);
         }
     }
index 9661c26..303c759 100644 (file)
@@ -39,7 +39,6 @@
 //
 //M*/
 #include "precomp.hpp"
-
 namespace cv
 {
 
@@ -454,6 +453,329 @@ cv::RotatedRect cv::fitEllipse( InputArray _points )
     return box;
 }
 
+cv::RotatedRect cv::fitEllipseAMS( InputArray _points )
+{
+    Mat points = _points.getMat();
+    int i, n = points.checkVector(2);
+    int depth = points.depth();
+    CV_Assert( n >= 0 && (depth == CV_32F || depth == CV_32S));
+
+    RotatedRect box;
+
+    if( n < 5 )
+        CV_Error( CV_StsBadSize, "There should be at least 5 points to fit the ellipse" );
+
+    Point2f c(0,0);
+
+    bool is_float = depth == CV_32F;
+    const Point* ptsi = points.ptr<Point>();
+    const Point2f* ptsf = points.ptr<Point2f>();
+
+    Mat A( n, 6, CV_64F);
+    Matx<double, 6, 6> DM;
+    Matx<double, 5, 5> M;
+    Matx<double, 5, 1> pVec;
+    Matx<double, 6, 1> coeffs;
+
+    double x0, y0, a, b, theta;
+
+    for( i = 0; i < n; i++ )
+    {
+        Point2f p = is_float ? ptsf[i] : Point2f((float)ptsi[i].x, (float)ptsi[i].y);
+        c += p;
+    }
+    c.x /= (float)n;
+    c.y /= (float)n;
+
+    for( i = 0; i < n; i++ )
+    {
+        Point2f p = is_float ? ptsf[i] : Point2f((float)ptsi[i].x, (float)ptsi[i].y);
+        p -= c;
+
+        A.at<double>(i,0) = (double)(p.x)*(p.x);
+        A.at<double>(i,1) = (double)(p.x)*(p.y);
+        A.at<double>(i,2) = (double)(p.y)*(p.y);
+        A.at<double>(i,3) = (double)p.x;
+        A.at<double>(i,4) = (double)p.y;
+        A.at<double>(i,5) = 1.0;
+    }
+    cv::mulTransposed( A, DM, true, noArray(), 1.0, -1 );
+    DM *= (1.0/n);
+    double dnm = ( DM(2,5)*(DM(0,5) + DM(2,5)) - (DM(1,5)*DM(1,5)) );
+    double ddm =  (4.*(DM(0,5) + DM(2,5))*( (DM(0,5)*DM(2,5)) - (DM(1,5)*DM(1,5))));
+    double ddmm = (2.*(DM(0,5) + DM(2,5))*( (DM(0,5)*DM(2,5)) - (DM(1,5)*DM(1,5))));
+
+    M(0,0)=((-DM(0,0) + DM(0,2) + DM(0,5)*DM(0,5))*(DM(1,5)*DM(1,5)) + (-2*DM(0,1)*DM(1,5) + DM(0,5)*(DM(0,0) \
+            - (DM(0,5)*DM(0,5)) + (DM(1,5)*DM(1,5))))*DM(2,5) + (DM(0,0) - (DM(0,5)*DM(0,5)))*(DM(2,5)*DM(2,5))) / ddm;
+    M(0,1)=((DM(1,5)*DM(1,5))*(-DM(0,1) + DM(1,2) + DM(0,5)*DM(1,5)) + (DM(0,1)*DM(0,5) - ((DM(0,5)*DM(0,5)) + 2*DM(1,1))*DM(1,5) + \
+            (DM(1,5)*DM(1,5)*DM(1,5)))*DM(2,5) + (DM(0,1) - DM(0,5)*DM(1,5))*(DM(2,5)*DM(2,5))) / ddm;
+    M(0,2)=(-2*DM(1,2)*DM(1,5)*DM(2,5) - DM(0,5)*(DM(2,5)*DM(2,5))*(DM(0,5) + DM(2,5)) + DM(0,2)*dnm + \
+            (DM(1,5)*DM(1,5))*(DM(2,2) + DM(2,5)*(DM(0,5) + DM(2,5))))/ddm;
+    M(0,3)=(DM(1,5)*(DM(1,5)*DM(2,3) - 2*DM(1,3)*DM(2,5)) + DM(0,3)*dnm) / ddm;
+    M(0,4)=(DM(1,5)*(DM(1,5)*DM(2,4) - 2*DM(1,4)*DM(2,5)) + DM(0,4)*dnm) / ddm;
+    M(1,0)=(-(DM(0,2)*DM(0,5)*DM(1,5)) + (2*DM(0,1)*DM(0,5) - DM(0,0)*DM(1,5))*DM(2,5))/ddmm;
+    M(1,1)=(-(DM(0,1)*DM(1,5)*DM(2,5)) + DM(0,5)*(-(DM(1,2)*DM(1,5)) + 2*DM(1,1)*DM(2,5)))/ddmm;
+    M(1,2)=(-(DM(0,2)*DM(1,5)*DM(2,5)) + DM(0,5)*(-(DM(1,5)*DM(2,2)) + 2*DM(1,2)*DM(2,5)))/ddmm;
+    M(1,3)=(-(DM(0,3)*DM(1,5)*DM(2,5)) + DM(0,5)*(-(DM(1,5)*DM(2,3)) + 2*DM(1,3)*DM(2,5)))/ddmm;
+    M(1,4)=(-(DM(0,4)*DM(1,5)*DM(2,5)) + DM(0,5)*(-(DM(1,5)*DM(2,4)) + 2*DM(1,4)*DM(2,5)))/ddmm;
+    M(2,0)=(-2*DM(0,1)*DM(0,5)*DM(1,5) + (DM(0,0) + (DM(0,5)*DM(0,5)))*(DM(1,5)*DM(1,5)) + DM(0,5)*(-(DM(0,5)*DM(0,5)) \
+            + (DM(1,5)*DM(1,5)))*DM(2,5) - (DM(0,5)*DM(0,5))*(DM(2,5)*DM(2,5)) + DM(0,2)*(-(DM(1,5)*DM(1,5)) + DM(0,5)*(DM(0,5) + DM(2,5)))) / ddm;
+    M(2,1)=((DM(0,5)*DM(0,5))*(DM(1,2) - DM(1,5)*DM(2,5)) + (DM(1,5)*DM(1,5))*(DM(0,1) - DM(1,2) + DM(1,5)*DM(2,5)) \
+            + DM(0,5)*(DM(1,2)*DM(2,5) + DM(1,5)*(-2*DM(1,1) + (DM(1,5)*DM(1,5)) - (DM(2,5)*DM(2,5))))) / ddm;
+    M(2,2)=((DM(0,5)*DM(0,5))*(DM(2,2) - (DM(2,5)*DM(2,5))) + (DM(1,5)*DM(1,5))*(DM(0,2) - DM(2,2) + (DM(2,5)*DM(2,5))) + \
+             DM(0,5)*(-2*DM(1,2)*DM(1,5) + DM(2,5)*((DM(1,5)*DM(1,5)) + DM(2,2) - (DM(2,5)*DM(2,5))))) / ddm;
+    M(2,3)=((DM(1,5)*DM(1,5))*(DM(0,3) - DM(2,3)) + (DM(0,5)*DM(0,5))*DM(2,3) + DM(0,5)*(-2*DM(1,3)*DM(1,5) + DM(2,3)*DM(2,5))) / ddm;
+    M(2,4)=((DM(1,5)*DM(1,5))*(DM(0,4) - DM(2,4)) + (DM(0,5)*DM(0,5))*DM(2,4) + DM(0,5)*(-2*DM(1,4)*DM(1,5) + DM(2,4)*DM(2,5))) / ddm;
+    M(3,0)=DM(0,3);
+    M(3,1)=DM(1,3);
+    M(3,2)=DM(2,3);
+    M(3,3)=DM(3,3);
+    M(3,4)=DM(3,4);
+    M(4,0)=DM(0,4);
+    M(4,1)=DM(1,4);
+    M(4,2)=DM(2,4);
+    M(4,3)=DM(3,4);
+    M(4,4)=DM(4,4);
+
+    if (fabs(cv::determinant(M)) > 1.0e-10) {
+            Mat eVal, eVec;
+            eigenNonSymmetric(M, eVal, eVec);
+
+            // Select the eigen vector {a,b,c,d,e} which has the lowest eigenvalue
+            int minpos = 0;
+            double normi, normEVali, normMinpos, normEValMinpos;
+            normMinpos = sqrt(eVec.at<double>(minpos,0)*eVec.at<double>(minpos,0) + eVec.at<double>(minpos,1)*eVec.at<double>(minpos,1) + \
+                              eVec.at<double>(minpos,2)*eVec.at<double>(minpos,2) + eVec.at<double>(minpos,3)*eVec.at<double>(minpos,3) + \
+                              eVec.at<double>(minpos,4)*eVec.at<double>(minpos,4) );
+            normEValMinpos = eVal.at<double>(minpos,0) * normMinpos;
+            for (i=1; i<5; i++) {
+                normi = sqrt(eVec.at<double>(i,0)*eVec.at<double>(i,0) + eVec.at<double>(i,1)*eVec.at<double>(i,1) + \
+                             eVec.at<double>(i,2)*eVec.at<double>(i,2) + eVec.at<double>(i,3)*eVec.at<double>(i,3) + \
+                             eVec.at<double>(i,4)*eVec.at<double>(i,4) );
+                normEVali = eVal.at<double>(i,0) * normi;
+                if (normEVali < normEValMinpos) {
+                    minpos = i;
+                    normMinpos=normi;
+                    normEValMinpos=normEVali;
+                }
+            };
+
+            pVec(0) =eVec.at<double>(minpos,0) / normMinpos;
+            pVec(1) =eVec.at<double>(minpos,1) / normMinpos;
+            pVec(2) =eVec.at<double>(minpos,2) / normMinpos;
+            pVec(3) =eVec.at<double>(minpos,3) / normMinpos;
+            pVec(4) =eVec.at<double>(minpos,4) / normMinpos;
+
+            coeffs(0) =pVec(0) ;
+            coeffs(1) =pVec(1) ;
+            coeffs(2) =pVec(2) ;
+            coeffs(3) =pVec(3) ;
+            coeffs(4) =pVec(4) ;
+            coeffs(5) =-pVec(0) *DM(0,5)-pVec(1) *DM(1,5)-coeffs(2) *DM(2,5);
+
+        // Check that an elliptical solution has been found. AMS sometimes produces Parabolic solutions.
+        bool is_ellipse = (coeffs(0)  < 0 && \
+                           coeffs(2)  < (coeffs(1) *coeffs(1) )/(4.*coeffs(0) ) && \
+                           coeffs(5)  > (-(coeffs(2) *(coeffs(3) *coeffs(3) )) + coeffs(1) *coeffs(3) *coeffs(4)  - coeffs(0) *(coeffs(4) *coeffs(4) )) / \
+                                        ((coeffs(1) *coeffs(1) ) - 4*coeffs(0) *coeffs(2) )) || \
+                          (coeffs(0)  > 0 && \
+                           coeffs(2)  > (coeffs(1) *coeffs(1) )/(4.*coeffs(0) ) && \
+                           coeffs(5)  < (-(coeffs(2) *(coeffs(3) *coeffs(3) )) + coeffs(1) *coeffs(3) *coeffs(4)  - coeffs(0) *(coeffs(4) *coeffs(4) )) / \
+                                        ( (coeffs(1) *coeffs(1) ) - 4*coeffs(0) *coeffs(2) ));
+        if (is_ellipse) {
+            double u1 = pVec(2) *pVec(3) *pVec(3)  - pVec(1) *pVec(3) *pVec(4)  + pVec(0) *pVec(4) *pVec(4)  + pVec(1) *pVec(1) *coeffs(5) ;
+            double u2 = pVec(0) *pVec(2) *coeffs(5) ;
+            double l1 = sqrt(pVec(1) *pVec(1)  + (pVec(0)  - pVec(2) )*(pVec(0)  - pVec(2) ));
+            double l2 = pVec(0)  + pVec(2) ;
+            double l3 = pVec(1) *pVec(1)  - 4.0*pVec(0) *pVec(2) ;
+            double p1 = 2.0*pVec(2) *pVec(3)  - pVec(1) *pVec(4) ;
+            double p2 = 2.0*pVec(0) *pVec(4) -(pVec(1) *pVec(3) );
+
+            x0 = p1/l3 + c.x;
+            y0 = p2/l3 + c.y;
+            a = std::sqrt(2.)*sqrt((u1 - 4.0*u2)/((l1 - l2)*l3));
+            b = std::sqrt(2.)*sqrt(-1.0*((u1 - 4.0*u2)/((l1 + l2)*l3)));
+            if (pVec(1)  == 0) {
+                if (pVec(0)  < pVec(2) ) {
+                    theta = 0;
+                } else {
+                    theta = CV_PI/2.;
+                }
+            } else {
+                theta = CV_PI/2. + 0.5*std::atan2(pVec(1) , (pVec(0)  - pVec(2) ));
+            }
+
+            box.center.x = (float)x0; // +c.x;
+            box.center.y = (float)y0; // +c.y;
+            box.size.width = (float)(2.0*a);
+            box.size.height = (float)(2.0*b);
+            if( box.size.width > box.size.height )
+            {
+                float tmp;
+                CV_SWAP( box.size.width, box.size.height, tmp );
+                box.angle = (float)(90 + theta*180/CV_PI);
+            } else {
+                box.angle = (float)(fmod(theta*180/CV_PI,180.0));
+            };
+
+
+        } else {
+            box = cv::fitEllipseDirect( points );
+        }
+    } else {
+        box = cv::fitEllipse( points );
+    }
+
+    return box;
+}
+
+cv::RotatedRect cv::fitEllipseDirect( InputArray _points )
+{
+    Mat points = _points.getMat();
+    int i, n = points.checkVector(2);
+    int depth = points.depth();
+    CV_Assert( n >= 0 && (depth == CV_32F || depth == CV_32S));
+
+    RotatedRect box;
+
+    if( n < 5 )
+        CV_Error( CV_StsBadSize, "There should be at least 5 points to fit the ellipse" );
+
+    Point2f c(0,0);
+
+    bool is_float = (depth == CV_32F);
+    const Point*   ptsi = points.ptr<Point>();
+    const Point2f* ptsf = points.ptr<Point2f>();
+
+    Mat A( n, 6, CV_64F);
+    Matx<double, 6, 6> DM;
+    Matx33d M, TM, Q;
+    Matx<double, 3, 1> pVec;
+
+    double x0, y0, a, b, theta, Ts;
+
+    for( i = 0; i < n; i++ )
+    {
+        Point2f p = is_float ? ptsf[i] : Point2f((float)ptsi[i].x, (float)ptsi[i].y);
+        c += p;
+    }
+    c.x /= (float)n;
+    c.y /= (float)n;
+
+    for( i = 0; i < n; i++ )
+    {
+        Point2f p = is_float ? ptsf[i] : Point2f((float)ptsi[i].x, (float)ptsi[i].y);
+        p -= c;
+
+        A.at<double>(i,0) = (double)(p.x)*(p.x);
+        A.at<double>(i,1) = (double)(p.x)*(p.y);
+        A.at<double>(i,2) = (double)(p.y)*(p.y);
+        A.at<double>(i,3) = (double)p.x;
+        A.at<double>(i,4) = (double)p.y;
+        A.at<double>(i,5) = 1.0;
+    }
+    cv::mulTransposed( A, DM, true, noArray(), 1.0, -1 );
+    DM *= (1.0/n);
+
+    TM(0,0) = DM(0,5)*DM(3,5)*DM(4,4) - DM(0,5)*DM(3,4)*DM(4,5) - DM(0,4)*DM(3,5)*DM(5,4) + \
+              DM(0,3)*DM(4,5)*DM(5,4) + DM(0,4)*DM(3,4)*DM(5,5) - DM(0,3)*DM(4,4)*DM(5,5);
+    TM(0,1) = DM(1,5)*DM(3,5)*DM(4,4) - DM(1,5)*DM(3,4)*DM(4,5) - DM(1,4)*DM(3,5)*DM(5,4) + \
+              DM(1,3)*DM(4,5)*DM(5,4) + DM(1,4)*DM(3,4)*DM(5,5) - DM(1,3)*DM(4,4)*DM(5,5);
+    TM(0,2) = DM(2,5)*DM(3,5)*DM(4,4) - DM(2,5)*DM(3,4)*DM(4,5) - DM(2,4)*DM(3,5)*DM(5,4) + \
+              DM(2,3)*DM(4,5)*DM(5,4) + DM(2,4)*DM(3,4)*DM(5,5) - DM(2,3)*DM(4,4)*DM(5,5);
+    TM(1,0) = DM(0,5)*DM(3,3)*DM(4,5) - DM(0,5)*DM(3,5)*DM(4,3) + DM(0,4)*DM(3,5)*DM(5,3) - \
+              DM(0,3)*DM(4,5)*DM(5,3) - DM(0,4)*DM(3,3)*DM(5,5) + DM(0,3)*DM(4,3)*DM(5,5);
+    TM(1,1) = DM(1,5)*DM(3,3)*DM(4,5) - DM(1,5)*DM(3,5)*DM(4,3) + DM(1,4)*DM(3,5)*DM(5,3) - \
+              DM(1,3)*DM(4,5)*DM(5,3) - DM(1,4)*DM(3,3)*DM(5,5) + DM(1,3)*DM(4,3)*DM(5,5);
+    TM(1,2) = DM(2,5)*DM(3,3)*DM(4,5) - DM(2,5)*DM(3,5)*DM(4,3) + DM(2,4)*DM(3,5)*DM(5,3) - \
+              DM(2,3)*DM(4,5)*DM(5,3) - DM(2,4)*DM(3,3)*DM(5,5) + DM(2,3)*DM(4,3)*DM(5,5);
+    TM(2,0) = DM(0,5)*DM(3,4)*DM(4,3) - DM(0,5)*DM(3,3)*DM(4,4) - DM(0,4)*DM(3,4)*DM(5,3) + \
+              DM(0,3)*DM(4,4)*DM(5,3) + DM(0,4)*DM(3,3)*DM(5,4) - DM(0,3)*DM(4,3)*DM(5,4);
+    TM(2,1) = DM(1,5)*DM(3,4)*DM(4,3) - DM(1,5)*DM(3,3)*DM(4,4) - DM(1,4)*DM(3,4)*DM(5,3) + \
+              DM(1,3)*DM(4,4)*DM(5,3) + DM(1,4)*DM(3,3)*DM(5,4) - DM(1,3)*DM(4,3)*DM(5,4);
+    TM(2,2) = DM(2,5)*DM(3,4)*DM(4,3) - DM(2,5)*DM(3,3)*DM(4,4) - DM(2,4)*DM(3,4)*DM(5,3) + \
+              DM(2,3)*DM(4,4)*DM(5,3) + DM(2,4)*DM(3,3)*DM(5,4) - DM(2,3)*DM(4,3)*DM(5,4);
+
+    Ts=(-(DM(3,5)*DM(4,4)*DM(5,3)) + DM(3,4)*DM(4,5)*DM(5,3) + DM(3,5)*DM(4,3)*DM(5,4) - \
+          DM(3,3)*DM(4,5)*DM(5,4)  - DM(3,4)*DM(4,3)*DM(5,5) + DM(3,3)*DM(4,4)*DM(5,5));
+
+    M(0,0) = (DM(2,0) + (DM(2,3)*TM(0,0) + DM(2,4)*TM(1,0) + DM(2,5)*TM(2,0))/Ts)/2.;
+    M(0,1) = (DM(2,1) + (DM(2,3)*TM(0,1) + DM(2,4)*TM(1,1) + DM(2,5)*TM(2,1))/Ts)/2.;
+    M(0,2) = (DM(2,2) + (DM(2,3)*TM(0,2) + DM(2,4)*TM(1,2) + DM(2,5)*TM(2,2))/Ts)/2.;
+    M(1,0) = -DM(1,0) - (DM(1,3)*TM(0,0) + DM(1,4)*TM(1,0) + DM(1,5)*TM(2,0))/Ts;
+    M(1,1) = -DM(1,1) - (DM(1,3)*TM(0,1) + DM(1,4)*TM(1,1) + DM(1,5)*TM(2,1))/Ts;
+    M(1,2) = -DM(1,2) - (DM(1,3)*TM(0,2) + DM(1,4)*TM(1,2) + DM(1,5)*TM(2,2))/Ts;
+    M(2,0) = (DM(0,0) + (DM(0,3)*TM(0,0) + DM(0,4)*TM(1,0) + DM(0,5)*TM(2,0))/Ts)/2.;
+    M(2,1) = (DM(0,1) + (DM(0,3)*TM(0,1) + DM(0,4)*TM(1,1) + DM(0,5)*TM(2,1))/Ts)/2.;
+    M(2,2) = (DM(0,2) + (DM(0,3)*TM(0,2) + DM(0,4)*TM(1,2) + DM(0,5)*TM(2,2))/Ts)/2.;
+
+    if (fabs(cv::determinant(M)) > 1.0e-10) {
+        Mat eVal, eVec;
+        eigenNonSymmetric(M, eVal, eVec);
+
+        // Select the eigen vector {a,b,c} which satisfies 4ac-b^2 > 0
+        double cond[3];
+        cond[0]=(4.0 * eVec.at<double>(0,0) * eVec.at<double>(0,2) - eVec.at<double>(0,1) * eVec.at<double>(0,1));
+        cond[1]=(4.0 * eVec.at<double>(1,0) * eVec.at<double>(1,2) - eVec.at<double>(1,1) * eVec.at<double>(1,1));
+        cond[2]=(4.0 * eVec.at<double>(2,0) * eVec.at<double>(2,2) - eVec.at<double>(2,1) * eVec.at<double>(2,1));
+        if (cond[0]<cond[1]) {
+            i = (cond[1]<cond[2]) ? 2 : 1;
+        } else {
+            i = (cond[0]<cond[2]) ? 2 : 0;
+        }
+        double norm = std::sqrt(eVec.at<double>(i,0)*eVec.at<double>(i,0) + eVec.at<double>(i,1)*eVec.at<double>(i,1) + eVec.at<double>(i,2)*eVec.at<double>(i,2));
+        if (((eVec.at<double>(i,0)<0.0  ? -1 : 1) * (eVec.at<double>(i,1)<0.0  ? -1 : 1) * (eVec.at<double>(i,2)<0.0  ? -1 : 1)) <= 0.0) {
+                norm=-1.0*norm;
+            }
+        pVec(0) =eVec.at<double>(i,0)/norm; pVec(1) =eVec.at<double>(i,1)/norm;pVec(2) =eVec.at<double>(i,2)/norm;
+
+    //  Q = (TM . pVec)/Ts;
+        Q(0,0) = (TM(0,0)*pVec(0) +TM(0,1)*pVec(1) +TM(0,2)*pVec(2) )/Ts;
+        Q(0,1) = (TM(1,0)*pVec(0) +TM(1,1)*pVec(1) +TM(1,2)*pVec(2) )/Ts;
+        Q(0,2) = (TM(2,0)*pVec(0) +TM(2,1)*pVec(1) +TM(2,2)*pVec(2) )/Ts;
+
+    // We compute the ellipse properties in the shifted coordinates as doing so improves the numerical accuracy.
+
+        double u1 = pVec(2)*Q(0,0)*Q(0,0) - pVec(1)*Q(0,0)*Q(0,1) + pVec(0)*Q(0,1)*Q(0,1) + pVec(1)*pVec(1)*Q(0,2);
+        double u2 = pVec(0)*pVec(2)*Q(0,2);
+        double l1 = sqrt(pVec(1)*pVec(1) + (pVec(0) - pVec(2))*(pVec(0) - pVec(2)));
+        double l2 = pVec(0) + pVec(2) ;
+        double l3 = pVec(1)*pVec(1) - 4*pVec(0)*pVec(2) ;
+        double p1 = 2*pVec(2)*Q(0,0) - pVec(1)*Q(0,1);
+        double p2 = 2*pVec(0)*Q(0,1) - pVec(1)*Q(0,0);
+
+        x0 = p1/l3 + c.x;
+        y0 = p2/l3 + c.y;
+        a = sqrt(2.)*sqrt((u1 - 4.0*u2)/((l1 - l2)*l3));
+        b = sqrt(2.)*sqrt(-1.0*((u1 - 4.0*u2)/((l1 + l2)*l3)));
+        if (pVec(1)  == 0) {
+            if (pVec(0)  < pVec(2) ) {
+                theta = 0;
+            } else {
+                theta = CV_PI/2.;
+            }
+        } else {
+                theta = CV_PI/2. + 0.5*std::atan2(pVec(1) , (pVec(0)  - pVec(2) ));
+        }
+
+        box.center.x = (float)x0;
+        box.center.y = (float)y0;
+        box.size.width = (float)(2.0*a);
+        box.size.height = (float)(2.0*b);
+        if( box.size.width > box.size.height )
+        {
+            float tmp;
+            CV_SWAP( box.size.width, box.size.height, tmp );
+            box.angle = (float)(fmod((90 + theta*180/CV_PI),180.0)) ;
+        } else {
+            box.angle = (float)(fmod(theta*180/CV_PI,180.0));
+        };
+    } else {
+        box = cv::fitEllipse( points );
+    }
+    return box;
+}
+
 
 namespace cv
 {
@@ -1080,5 +1402,4 @@ cvBoundingRect( CvArr* array, int update )
     return rect;
 }
 
-
 /* End of file. */
index 889ce2e..a9a9f86 100644 (file)
@@ -299,11 +299,9 @@ struct ColumnSum<int, uchar> :
         bool haveScale = scale != 1;
         double _scale = scale;
 
-        #if CV_SSE2
-            bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
-        #elif CV_NEON
-            bool haveNEON = checkHardwareSupport(CV_CPU_NEON);
-        #endif
+#if CV_SIMD128
+        bool haveSIMD128 = hasSIMD128();
+#endif
 
         if( width != (int)sum.size() )
         {
@@ -319,23 +317,15 @@ struct ColumnSum<int, uchar> :
             {
                 const int* Sp = (const int*)src[0];
                 int i = 0;
-                #if CV_SSE2
-                if(haveSSE2)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
-                    for( ; i <= width-4; i+=4 )
+                    for (; i <= width - 4; i += 4)
                     {
-                        __m128i _sum = _mm_loadu_si128((const __m128i*)(SUM+i));
-                        __m128i _sp = _mm_loadu_si128((const __m128i*)(Sp+i));
-                        _mm_storeu_si128((__m128i*)(SUM+i),_mm_add_epi32(_sum, _sp));
+                        v_store(SUM + i, v_load(SUM + i) + v_load(Sp + i));
                     }
                 }
-                #elif CV_NEON
-                if(haveNEON)
-                {
-                    for( ; i <= width - 4; i+=4 )
-                        vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
-                }
-                #endif
+#endif
                 for( ; i < width; i++ )
                     SUM[i] += Sp[i];
             }
@@ -354,51 +344,27 @@ struct ColumnSum<int, uchar> :
             if( haveScale )
             {
                 int i = 0;
-                #if CV_SSE2
-                if(haveSSE2)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
-                    const __m128 scale4 = _mm_set1_ps((float)_scale);
-                    for( ; i <= width-8; i+=8 )
-                    {
-                        __m128i _sm  = _mm_loadu_si128((const __m128i*)(Sm+i));
-                        __m128i _sm1  = _mm_loadu_si128((const __m128i*)(Sm+i+4));
-
-                        __m128i _s0  = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(SUM+i)),
-                                                     _mm_loadu_si128((const __m128i*)(Sp+i)));
-                        __m128i _s01  = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(SUM+i+4)),
-                                                      _mm_loadu_si128((const __m128i*)(Sp+i+4)));
-
-                        __m128i _s0T = _mm_cvtps_epi32(_mm_mul_ps(scale4, _mm_cvtepi32_ps(_s0)));
-                        __m128i _s0T1 = _mm_cvtps_epi32(_mm_mul_ps(scale4, _mm_cvtepi32_ps(_s01)));
-
-                        _s0T = _mm_packs_epi32(_s0T, _s0T1);
 
-                        _mm_storel_epi64((__m128i*)(D+i), _mm_packus_epi16(_s0T, _s0T));
-
-                        _mm_storeu_si128((__m128i*)(SUM+i), _mm_sub_epi32(_s0,_sm));
-                        _mm_storeu_si128((__m128i*)(SUM+i+4),_mm_sub_epi32(_s01,_sm1));
-                    }
-                }
-                #elif CV_NEON
-                if(haveNEON)
-                {
-                    float32x4_t v_scale = vdupq_n_f32((float)_scale);
+                    v_float32x4 v_scale = v_setall_f32((float)_scale);
                     for( ; i <= width-8; i+=8 )
                     {
-                        int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
-                        int32x4_t v_s01 = vaddq_s32(vld1q_s32(SUM + i + 4), vld1q_s32(Sp + i + 4));
+                        v_int32x4 v_s0 = v_load(SUM + i) + v_load(Sp + i);
+                        v_int32x4 v_s01 = v_load(SUM + i + 4) + v_load(Sp + i + 4);
 
-                        uint32x4_t v_s0d = cv_vrndq_u32_f32(vmulq_f32(vcvtq_f32_s32(v_s0), v_scale));
-                        uint32x4_t v_s01d = cv_vrndq_u32_f32(vmulq_f32(vcvtq_f32_s32(v_s01), v_scale));
+                        v_uint32x4 v_s0d = v_reinterpret_as_u32(v_round(v_cvt_f32(v_s0) * v_scale));
+                        v_uint32x4 v_s01d = v_reinterpret_as_u32(v_round(v_cvt_f32(v_s01) * v_scale));
 
-                        uint16x8_t v_dst = vcombine_u16(vqmovn_u32(v_s0d), vqmovn_u32(v_s01d));
-                        vst1_u8(D + i, vqmovn_u16(v_dst));
+                        v_uint16x8 v_dst = v_pack(v_s0d, v_s01d);
+                        v_pack_store(D + i, v_dst);
 
-                        vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
-                        vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
+                        v_store(SUM + i, v_s0 - v_load(Sm + i));
+                        v_store(SUM + i + 4, v_s01 - v_load(Sm + i + 4));
                     }
                 }
-                #endif
+#endif
                 for( ; i < width; i++ )
                 {
                     int s0 = SUM[i] + Sp[i];
@@ -409,43 +375,22 @@ struct ColumnSum<int, uchar> :
             else
             {
                 int i = 0;
-                #if CV_SSE2
-                if(haveSSE2)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
                     for( ; i <= width-8; i+=8 )
                     {
-                        __m128i _sm  = _mm_loadu_si128((const __m128i*)(Sm+i));
-                        __m128i _sm1  = _mm_loadu_si128((const __m128i*)(Sm+i+4));
-
-                        __m128i _s0  = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(SUM+i)),
-                                                     _mm_loadu_si128((const __m128i*)(Sp+i)));
-                        __m128i _s01  = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(SUM+i+4)),
-                                                      _mm_loadu_si128((const __m128i*)(Sp+i+4)));
-
-                        __m128i _s0T = _mm_packs_epi32(_s0, _s01);
+                        v_int32x4 v_s0 = v_load(SUM + i) + v_load(Sp + i);
+                        v_int32x4 v_s01 = v_load(SUM + i + 4) + v_load(Sp + i + 4);
 
-                        _mm_storel_epi64((__m128i*)(D+i), _mm_packus_epi16(_s0T, _s0T));
+                        v_uint16x8 v_dst = v_pack(v_reinterpret_as_u32(v_s0), v_reinterpret_as_u32(v_s01));
+                        v_pack_store(D + i, v_dst);
 
-                        _mm_storeu_si128((__m128i*)(SUM+i), _mm_sub_epi32(_s0,_sm));
-                        _mm_storeu_si128((__m128i*)(SUM+i+4),_mm_sub_epi32(_s01,_sm1));
+                        v_store(SUM + i, v_s0 - v_load(Sm + i));
+                        v_store(SUM + i + 4, v_s01 - v_load(Sm + i + 4));
                     }
                 }
-                #elif CV_NEON
-                if(haveNEON)
-                {
-                    for( ; i <= width-8; i+=8 )
-                    {
-                        int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
-                        int32x4_t v_s01 = vaddq_s32(vld1q_s32(SUM + i + 4), vld1q_s32(Sp + i + 4));
-
-                        uint16x8_t v_dst = vcombine_u16(vqmovun_s32(v_s0), vqmovun_s32(v_s01));
-                        vst1_u8(D + i, vqmovn_u16(v_dst));
-
-                        vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
-                        vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
-                    }
-                }
-                #endif
+#endif
 
                 for( ; i < width; i++ )
                 {
@@ -502,10 +447,8 @@ public BaseColumnFilter
         ushort* SUM;
         const bool haveScale = scale != 1;
 
-#if CV_SSE2
-        bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
-#elif CV_NEON
-        bool haveNEON = checkHardwareSupport(CV_CPU_NEON);
+#if CV_SIMD128
+        bool haveSIMD128 = hasSIMD128();
 #endif
 
         if( width != (int)sum.size() )
@@ -522,22 +465,14 @@ public BaseColumnFilter
             {
                 const ushort* Sp = (const ushort*)src[0];
                 int i = 0;
-#if CV_SSE2
-                if(haveSSE2)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
-                    for( ; i <= width-8; i+=8 )
+                    for( ; i <= width - 8; i += 8 )
                     {
-                        __m128i _sum = _mm_loadu_si128((const __m128i*)(SUM+i));
-                        __m128i _sp = _mm_loadu_si128((const __m128i*)(Sp+i));
-                        _mm_storeu_si128((__m128i*)(SUM+i),_mm_add_epi16(_sum, _sp));
+                        v_store(SUM + i, v_load(SUM + i) + v_load(Sp + i));
                     }
                 }
-#elif CV_NEON
-                if(haveNEON)
-                {
-                    for( ; i <= width - 8; i+=8 )
-                        vst1q_u16(SUM + i, vaddq_u16(vld1q_u16(SUM + i), vld1q_u16(Sp + i)));
-                }
 #endif
                 for( ; i < width; i++ )
                     SUM[i] += Sp[i];
@@ -641,11 +576,9 @@ struct ColumnSum<int, short> :
         bool haveScale = scale != 1;
         double _scale = scale;
 
-        #if CV_SSE2
-            bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
-        #elif CV_NEON
-            bool haveNEON = checkHardwareSupport(CV_CPU_NEON);
-        #endif
+#if CV_SIMD128
+        bool haveSIMD128 = hasSIMD128();
+#endif
 
         if( width != (int)sum.size() )
         {
@@ -661,22 +594,14 @@ struct ColumnSum<int, short> :
             {
                 const int* Sp = (const int*)src[0];
                 i = 0;
-                #if CV_SSE2
-                if(haveSSE2)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
-                    for( ; i <= width-4; i+=4 )
+                    for( ; i <= width - 4; i+=4 )
                     {
-                        __m128i _sum = _mm_loadu_si128((const __m128i*)(SUM+i));
-                        __m128i _sp = _mm_loadu_si128((const __m128i*)(Sp+i));
-                        _mm_storeu_si128((__m128i*)(SUM+i),_mm_add_epi32(_sum, _sp));
+                        v_store(SUM + i, v_load(SUM + i) + v_load(Sp + i));
                     }
                 }
-                #elif CV_NEON
-                if(haveNEON)
-                {
-                    for( ; i <= width - 4; i+=4 )
-                        vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
-                }
                 #endif
                 for( ; i < width; i++ )
                     SUM[i] += Sp[i];
@@ -696,47 +621,24 @@ struct ColumnSum<int, short> :
             if( haveScale )
             {
                 i = 0;
-                #if CV_SSE2
-                if(haveSSE2)
-                {
-                    const __m128 scale4 = _mm_set1_ps((float)_scale);
-                    for( ; i <= width-8; i+=8 )
-                    {
-                        __m128i _sm   = _mm_loadu_si128((const __m128i*)(Sm+i));
-                        __m128i _sm1  = _mm_loadu_si128((const __m128i*)(Sm+i+4));
-
-                        __m128i _s0  = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(SUM+i)),
-                                                     _mm_loadu_si128((const __m128i*)(Sp+i)));
-                        __m128i _s01  = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(SUM+i+4)),
-                                                      _mm_loadu_si128((const __m128i*)(Sp+i+4)));
-
-                        __m128i _s0T  = _mm_cvtps_epi32(_mm_mul_ps(scale4, _mm_cvtepi32_ps(_s0)));
-                        __m128i _s0T1 = _mm_cvtps_epi32(_mm_mul_ps(scale4, _mm_cvtepi32_ps(_s01)));
-
-                        _mm_storeu_si128((__m128i*)(D+i), _mm_packs_epi32(_s0T, _s0T1));
-
-                        _mm_storeu_si128((__m128i*)(SUM+i),_mm_sub_epi32(_s0,_sm));
-                        _mm_storeu_si128((__m128i*)(SUM+i+4), _mm_sub_epi32(_s01,_sm1));
-                    }
-                }
-                #elif CV_NEON
-                if(haveNEON)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
-                    float32x4_t v_scale = vdupq_n_f32((float)_scale);
+                    v_float32x4 v_scale = v_setall_f32((float)_scale);
                     for( ; i <= width-8; i+=8 )
                     {
-                        int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
-                        int32x4_t v_s01 = vaddq_s32(vld1q_s32(SUM + i + 4), vld1q_s32(Sp + i + 4));
+                        v_int32x4 v_s0 = v_load(SUM + i) + v_load(Sp + i);
+                        v_int32x4 v_s01 = v_load(SUM + i + 4) + v_load(Sp + i + 4);
 
-                        int32x4_t v_s0d = cv_vrndq_s32_f32(vmulq_f32(vcvtq_f32_s32(v_s0), v_scale));
-                        int32x4_t v_s01d = cv_vrndq_s32_f32(vmulq_f32(vcvtq_f32_s32(v_s01), v_scale));
-                        vst1q_s16(D + i, vcombine_s16(vqmovn_s32(v_s0d), vqmovn_s32(v_s01d)));
+                        v_int32x4 v_s0d =  v_round(v_cvt_f32(v_s0) * v_scale);
+                        v_int32x4 v_s01d = v_round(v_cvt_f32(v_s01) * v_scale);
+                        v_store(D + i, v_pack(v_s0d, v_s01d));
 
-                        vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
-                        vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
+                        v_store(SUM + i, v_s0 - v_load(Sm + i));
+                        v_store(SUM + i + 4, v_s01 - v_load(Sm + i + 4));
                     }
                 }
-                #endif
+#endif
                 for( ; i < width; i++ )
                 {
                     int s0 = SUM[i] + Sp[i];
@@ -747,41 +649,21 @@ struct ColumnSum<int, short> :
             else
             {
                 i = 0;
-                #if CV_SSE2
-                if(haveSSE2)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
                     for( ; i <= width-8; i+=8 )
                     {
+                        v_int32x4 v_s0 = v_load(SUM + i) + v_load(Sp + i);
+                        v_int32x4 v_s01 = v_load(SUM + i + 4) + v_load(Sp + i + 4);
 
-                        __m128i _sm  = _mm_loadu_si128((const __m128i*)(Sm+i));
-                        __m128i _sm1  = _mm_loadu_si128((const __m128i*)(Sm+i+4));
-
-                        __m128i _s0  = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(SUM+i)),
-                                                     _mm_loadu_si128((const __m128i*)(Sp+i)));
-                        __m128i _s01  = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(SUM+i+4)),
-                                                      _mm_loadu_si128((const __m128i*)(Sp+i+4)));
+                        v_store(D + i, v_pack(v_s0, v_s01));
 
-                        _mm_storeu_si128((__m128i*)(D+i), _mm_packs_epi32(_s0, _s01));
-
-                        _mm_storeu_si128((__m128i*)(SUM+i), _mm_sub_epi32(_s0,_sm));
-                        _mm_storeu_si128((__m128i*)(SUM+i+4),_mm_sub_epi32(_s01,_sm1));
+                        v_store(SUM + i, v_s0 - v_load(Sm + i));
+                        v_store(SUM + i + 4, v_s01 - v_load(Sm + i + 4));
                     }
                 }
-                #elif CV_NEON
-                if(haveNEON)
-                {
-                    for( ; i <= width-8; i+=8 )
-                    {
-                        int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
-                        int32x4_t v_s01 = vaddq_s32(vld1q_s32(SUM + i + 4), vld1q_s32(Sp + i + 4));
-
-                        vst1q_s16(D + i, vcombine_s16(vqmovn_s32(v_s0), vqmovn_s32(v_s01)));
-
-                        vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
-                        vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
-                    }
-                }
-                #endif
+#endif
 
                 for( ; i < width; i++ )
                 {
@@ -821,11 +703,9 @@ struct ColumnSum<int, ushort> :
         bool haveScale = scale != 1;
         double _scale = scale;
 
-        #if CV_SSE2
-            bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
-        #elif CV_NEON
-            bool haveNEON = checkHardwareSupport(CV_CPU_NEON);
-        #endif
+#if CV_SIMD128
+        bool haveSIMD128 = hasSIMD128();
+#endif
 
         if( width != (int)sum.size() )
         {
@@ -841,23 +721,15 @@ struct ColumnSum<int, ushort> :
             {
                 const int* Sp = (const int*)src[0];
                 int i = 0;
-                #if CV_SSE2
-                if(haveSSE2)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
-                    for( ; i <= width-4; i+=4 )
+                    for (; i <= width - 4; i += 4)
                     {
-                        __m128i _sum = _mm_loadu_si128((const __m128i*)(SUM+i));
-                        __m128i _sp = _mm_loadu_si128((const __m128i*)(Sp+i));
-                        _mm_storeu_si128((__m128i*)(SUM+i),_mm_add_epi32(_sum, _sp));
+                        v_store(SUM + i, v_load(SUM + i) + v_load(Sp + i));
                     }
                 }
-                #elif CV_NEON
-                if(haveNEON)
-                {
-                    for( ; i <= width - 4; i+=4 )
-                        vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
-                }
-                #endif
+#endif
                 for( ; i < width; i++ )
                     SUM[i] += Sp[i];
             }
@@ -876,46 +748,24 @@ struct ColumnSum<int, ushort> :
             if( haveScale )
             {
                 int i = 0;
-                #if CV_SSE2
-                if(haveSSE2)
-                {
-                    const __m128 scale4 = _mm_set1_ps((float)_scale);
-                    const __m128i delta0 = _mm_set1_epi32(0x8000);
-                    const __m128i delta1 = _mm_set1_epi32(0x80008000);
-
-                    for( ; i < width-4; i+=4)
-                    {
-                        __m128i _sm   = _mm_loadu_si128((const __m128i*)(Sm+i));
-                        __m128i _s0   = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(SUM+i)),
-                                                      _mm_loadu_si128((const __m128i*)(Sp+i)));
-
-                        __m128i _res = _mm_cvtps_epi32(_mm_mul_ps(scale4, _mm_cvtepi32_ps(_s0)));
-
-                        _res = _mm_sub_epi32(_res, delta0);
-                        _res = _mm_add_epi16(_mm_packs_epi32(_res, _res), delta1);
-
-                        _mm_storel_epi64((__m128i*)(D+i), _res);
-                        _mm_storeu_si128((__m128i*)(SUM+i), _mm_sub_epi32(_s0,_sm));
-                    }
-                }
-                #elif CV_NEON
-                if(haveNEON)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
-                    float32x4_t v_scale = vdupq_n_f32((float)_scale);
+                    v_float32x4 v_scale = v_setall_f32((float)_scale);
                     for( ; i <= width-8; i+=8 )
                     {
-                        int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
-                        int32x4_t v_s01 = vaddq_s32(vld1q_s32(SUM + i + 4), vld1q_s32(Sp + i + 4));
+                        v_int32x4 v_s0 = v_load(SUM + i) + v_load(Sp + i);
+                        v_int32x4 v_s01 = v_load(SUM + i + 4) + v_load(Sp + i + 4);
 
-                        uint32x4_t v_s0d = cv_vrndq_u32_f32(vmulq_f32(vcvtq_f32_s32(v_s0), v_scale));
-                        uint32x4_t v_s01d = cv_vrndq_u32_f32(vmulq_f32(vcvtq_f32_s32(v_s01), v_scale));
-                        vst1q_u16(D + i, vcombine_u16(vqmovn_u32(v_s0d), vqmovn_u32(v_s01d)));
+                        v_uint32x4 v_s0d = v_reinterpret_as_u32(v_round(v_cvt_f32(v_s0) * v_scale));
+                        v_uint32x4 v_s01d = v_reinterpret_as_u32(v_round(v_cvt_f32(v_s01) * v_scale));
+                        v_store(D + i, v_pack(v_s0d, v_s01d));
 
-                        vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
-                        vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
+                        v_store(SUM + i, v_s0 - v_load(Sm + i));
+                        v_store(SUM + i + 4, v_s01 - v_load(Sm + i + 4));
                     }
                 }
-                #endif
+#endif
                 for( ; i < width; i++ )
                 {
                     int s0 = SUM[i] + Sp[i];
@@ -926,41 +776,21 @@ struct ColumnSum<int, ushort> :
             else
             {
                 int i = 0;
-                #if CV_SSE2
-                if(haveSSE2)
-                {
-                    const __m128i delta0 = _mm_set1_epi32(0x8000);
-                    const __m128i delta1 = _mm_set1_epi32(0x80008000);
-
-                    for( ; i < width-4; i+=4 )
-                    {
-                        __m128i _sm   = _mm_loadu_si128((const __m128i*)(Sm+i));
-                        __m128i _s0   = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(SUM+i)),
-                                                      _mm_loadu_si128((const __m128i*)(Sp+i)));
-
-                        __m128i _res = _mm_sub_epi32(_s0, delta0);
-                        _res = _mm_add_epi16(_mm_packs_epi32(_res, _res), delta1);
-
-                        _mm_storel_epi64((__m128i*)(D+i), _res);
-                        _mm_storeu_si128((__m128i*)(SUM+i), _mm_sub_epi32(_s0,_sm));
-                    }
-                }
-                #elif CV_NEON
-                if(haveNEON)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
                     for( ; i <= width-8; i+=8 )
                     {
-                        int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
-                        int32x4_t v_s01 = vaddq_s32(vld1q_s32(SUM + i + 4), vld1q_s32(Sp + i + 4));
+                        v_int32x4 v_s0 = v_load(SUM + i) + v_load(Sp + i);
+                        v_int32x4 v_s01 = v_load(SUM + i + 4) + v_load(Sp + i + 4);
 
-                        vst1q_u16(D + i, vcombine_u16(vqmovun_s32(v_s0), vqmovun_s32(v_s01)));
+                        v_store(D + i, v_pack(v_reinterpret_as_u32(v_s0), v_reinterpret_as_u32(v_s01)));
 
-                        vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
-                        vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
+                        v_store(SUM + i, v_s0 - v_load(Sm + i));
+                        v_store(SUM + i + 4, v_s01 - v_load(Sm + i + 4));
                     }
                 }
-                #endif
-
+#endif
                 for( ; i < width; i++ )
                 {
                     int s0 = SUM[i] + Sp[i];
@@ -998,11 +828,9 @@ struct ColumnSum<int, int> :
         bool haveScale = scale != 1;
         double _scale = scale;
 
-        #if CV_SSE2
-            bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
-        #elif CV_NEON
-            bool haveNEON = checkHardwareSupport(CV_CPU_NEON);
-        #endif
+#if CV_SIMD128
+        bool haveSIMD128 = hasSIMD128();
+#endif
 
         if( width != (int)sum.size() )
         {
@@ -1018,23 +846,15 @@ struct ColumnSum<int, int> :
             {
                 const int* Sp = (const int*)src[0];
                 int i = 0;
-                #if CV_SSE2
-                if(haveSSE2)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
-                    for( ; i <= width-4; i+=4 )
+                    for( ; i <= width - 4; i+=4 )
                     {
-                        __m128i _sum = _mm_loadu_si128((const __m128i*)(SUM+i));
-                        __m128i _sp = _mm_loadu_si128((const __m128i*)(Sp+i));
-                        _mm_storeu_si128((__m128i*)(SUM+i),_mm_add_epi32(_sum, _sp));
+                        v_store(SUM + i, v_load(SUM + i) + v_load(Sp + i));
                     }
                 }
-                #elif CV_NEON
-                if(haveNEON)
-                {
-                    for( ; i <= width - 4; i+=4 )
-                        vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
-                }
-                #endif
+#endif
                 for( ; i < width; i++ )
                     SUM[i] += Sp[i];
             }
@@ -1053,38 +873,20 @@ struct ColumnSum<int, int> :
             if( haveScale )
             {
                 int i = 0;
-                #if CV_SSE2
-                if(haveSSE2)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
-                    const __m128 scale4 = _mm_set1_ps((float)_scale);
+                    v_float32x4 v_scale = v_setall_f32((float)_scale);
                     for( ; i <= width-4; i+=4 )
                     {
-                        __m128i _sm   = _mm_loadu_si128((const __m128i*)(Sm+i));
-
-                        __m128i _s0  = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(SUM+i)),
-                                                     _mm_loadu_si128((const __m128i*)(Sp+i)));
+                        v_int32x4 v_s0 = v_load(SUM + i) + v_load(Sp + i);
+                        v_int32x4 v_s0d = v_round(v_cvt_f32(v_s0) * v_scale);
 
-                        __m128i _s0T  = _mm_cvtps_epi32(_mm_mul_ps(scale4, _mm_cvtepi32_ps(_s0)));
-
-                        _mm_storeu_si128((__m128i*)(D+i), _s0T);
-                        _mm_storeu_si128((__m128i*)(SUM+i),_mm_sub_epi32(_s0,_sm));
+                        v_store(D + i, v_s0d);
+                        v_store(SUM + i, v_s0 - v_load(Sm + i));
                     }
                 }
-                #elif CV_NEON
-                if(haveNEON)
-                {
-                    float32x4_t v_scale = vdupq_n_f32((float)_scale);
-                    for( ; i <= width-4; i+=4 )
-                    {
-                        int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
-
-                        int32x4_t v_s0d = cv_vrndq_s32_f32(vmulq_f32(vcvtq_f32_s32(v_s0), v_scale));
-                        vst1q_s32(D + i, v_s0d);
-
-                        vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
-                    }
-                }
-                #endif
+#endif
                 for( ; i < width; i++ )
                 {
                     int s0 = SUM[i] + Sp[i];
@@ -1095,32 +897,18 @@ struct ColumnSum<int, int> :
             else
             {
                 int i = 0;
-                #if CV_SSE2
-                if(haveSSE2)
-                {
-                    for( ; i <= width-4; i+=4 )
-                    {
-                        __m128i _sm  = _mm_loadu_si128((const __m128i*)(Sm+i));
-                        __m128i _s0  = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(SUM+i)),
-                                                     _mm_loadu_si128((const __m128i*)(Sp+i)));
-
-                        _mm_storeu_si128((__m128i*)(D+i), _s0);
-                        _mm_storeu_si128((__m128i*)(SUM+i), _mm_sub_epi32(_s0,_sm));
-                    }
-                }
-                #elif CV_NEON
-                if(haveNEON)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
                     for( ; i <= width-4; i+=4 )
                     {
-                        int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
+                        v_int32x4 v_s0 = v_load(SUM + i) + v_load(Sp + i);
 
-                        vst1q_s32(D + i, v_s0);
-                        vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
+                        v_store(D + i, v_s0);
+                        v_store(SUM + i, v_s0 - v_load(Sm + i));
                     }
                 }
-                #endif
-
+#endif
                 for( ; i < width; i++ )
                 {
                     int s0 = SUM[i] + Sp[i];
@@ -1159,11 +947,9 @@ struct ColumnSum<int, float> :
         bool haveScale = scale != 1;
         double _scale = scale;
 
-        #if CV_SSE2
-            bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
-        #elif CV_NEON
-            bool haveNEON = checkHardwareSupport(CV_CPU_NEON);
-        #endif
+#if CV_SIMD128
+        bool haveSIMD128 = hasSIMD128();
+#endif
 
         if( width != (int)sum.size() )
         {
@@ -1179,23 +965,15 @@ struct ColumnSum<int, float> :
             {
                 const int* Sp = (const int*)src[0];
                 int i = 0;
-                #if CV_SSE2
-                if(haveSSE2)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
-                    for( ; i <= width-4; i+=4 )
+                    for( ; i <= width - 4; i+=4 )
                     {
-                        __m128i _sum = _mm_loadu_si128((const __m128i*)(SUM+i));
-                        __m128i _sp = _mm_loadu_si128((const __m128i*)(Sp+i));
-                        _mm_storeu_si128((__m128i*)(SUM+i),_mm_add_epi32(_sum, _sp));
+                        v_store(SUM + i, v_load(SUM + i) + v_load(Sp + i));
                     }
                 }
-                #elif CV_NEON
-                if(haveNEON)
-                {
-                    for( ; i <= width - 4; i+=4 )
-                        vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
-                }
-                #endif
+#endif
 
                 for( ; i < width; i++ )
                     SUM[i] += Sp[i];
@@ -1216,39 +994,23 @@ struct ColumnSum<int, float> :
             {
                 int i = 0;
 
-                #if CV_SSE2
-                if(haveSSE2)
-                {
-                    const __m128 scale4 = _mm_set1_ps((float)_scale);
-
-                    for( ; i < width-4; i+=4)
-                    {
-                        __m128i _sm   = _mm_loadu_si128((const __m128i*)(Sm+i));
-                        __m128i _s0   = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(SUM+i)),
-                                                      _mm_loadu_si128((const __m128i*)(Sp+i)));
-
-                        _mm_storeu_ps(D+i, _mm_mul_ps(scale4, _mm_cvtepi32_ps(_s0)));
-                        _mm_storeu_si128((__m128i*)(SUM+i), _mm_sub_epi32(_s0,_sm));
-                    }
-                }
-                #elif CV_NEON
-                if(haveNEON)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
-                    float32x4_t v_scale = vdupq_n_f32((float)_scale);
-                    for( ; i <= width-8; i+=8 )
+                    v_float32x4 v_scale = v_setall_f32((float)_scale);
+                    for (; i <= width - 8; i += 8)
                     {
-                        int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
-                        int32x4_t v_s01 = vaddq_s32(vld1q_s32(SUM + i + 4), vld1q_s32(Sp + i + 4));
+                        v_int32x4 v_s0 = v_load(SUM + i) + v_load(Sp + i);
+                        v_int32x4 v_s01 = v_load(SUM + i + 4) + v_load(Sp + i + 4);
 
-                        vst1q_f32(D + i, vmulq_f32(vcvtq_f32_s32(v_s0), v_scale));
-                        vst1q_f32(D + i + 4, vmulq_f32(vcvtq_f32_s32(v_s01), v_scale));
+                        v_store(D + i, v_cvt_f32(v_s0) * v_scale);
+                        v_store(D + i + 4, v_cvt_f32(v_s01) * v_scale);
 
-                        vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
-                        vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
+                        v_store(SUM + i, v_s0 - v_load(Sm + i));
+                        v_store(SUM + i + 4, v_s01 - v_load(Sm + i + 4));
                     }
                 }
-                #endif
-
+#endif
                 for( ; i < width; i++ )
                 {
                     int s0 = SUM[i] + Sp[i];
@@ -1260,36 +1022,22 @@ struct ColumnSum<int, float> :
             {
                 int i = 0;
 
-                #if CV_SSE2
-                if(haveSSE2)
-                {
-                    for( ; i < width-4; i+=4)
-                    {
-                        __m128i _sm   = _mm_loadu_si128((const __m128i*)(Sm+i));
-                        __m128i _s0   = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(SUM+i)),
-                                                      _mm_loadu_si128((const __m128i*)(Sp+i)));
-
-                        _mm_storeu_ps(D+i, _mm_cvtepi32_ps(_s0));
-                        _mm_storeu_si128((__m128i*)(SUM+i), _mm_sub_epi32(_s0,_sm));
-                    }
-                }
-                #elif CV_NEON
-                if(haveNEON)
+#if CV_SIMD128
+                if( haveSIMD128 )
                 {
                     for( ; i <= width-8; i+=8 )
                     {
-                        int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
-                        int32x4_t v_s01 = vaddq_s32(vld1q_s32(SUM + i + 4), vld1q_s32(Sp + i + 4));
+                        v_int32x4 v_s0 = v_load(SUM + i) + v_load(Sp + i);
+                        v_int32x4 v_s01 = v_load(SUM + i + 4) + v_load(Sp + i + 4);
 
-                        vst1q_f32(D + i, vcvtq_f32_s32(v_s0));
-                        vst1q_f32(D + i + 4, vcvtq_f32_s32(v_s01));
+                        v_store(D + i, v_cvt_f32(v_s0));
+                        v_store(D + i + 4, v_cvt_f32(v_s01));
 
-                        vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
-                        vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
+                        v_store(SUM + i, v_s0 - v_load(Sm + i));
+                        v_store(SUM + i + 4, v_s01 - v_load(Sm + i + 4));
                     }
                 }
-                #endif
-
+#endif
                 for( ; i < width; i++ )
                 {
                     int s0 = SUM[i] + Sp[i];
@@ -2395,46 +2143,20 @@ typedef struct
 } Histogram;
 
 
-#if CV_SSE2
-#define MEDIAN_HAVE_SIMD 1
+#if CV_SIMD128
 
 static inline void histogram_add_simd( const HT x[16], HT y[16] )
 {
-    const __m128i* rx = (const __m128i*)x;
-    __m128i* ry = (__m128i*)y;
-    __m128i r0 = _mm_add_epi16(_mm_load_si128(ry+0),_mm_load_si128(rx+0));
-    __m128i r1 = _mm_add_epi16(_mm_load_si128(ry+1),_mm_load_si128(rx+1));
-    _mm_store_si128(ry+0, r0);
-    _mm_store_si128(ry+1, r1);
+    v_store(y, v_load(x) + v_load(y));
+    v_store(y + 8, v_load(x + 8) + v_load(y + 8));
 }
 
 static inline void histogram_sub_simd( const HT x[16], HT y[16] )
 {
-    const __m128i* rx = (const __m128i*)x;
-    __m128i* ry = (__m128i*)y;
-    __m128i r0 = _mm_sub_epi16(_mm_load_si128(ry+0),_mm_load_si128(rx+0));
-    __m128i r1 = _mm_sub_epi16(_mm_load_si128(ry+1),_mm_load_si128(rx+1));
-    _mm_store_si128(ry+0, r0);
-    _mm_store_si128(ry+1, r1);
-}
-
-#elif CV_NEON
-#define MEDIAN_HAVE_SIMD 1
-
-static inline void histogram_add_simd( const HT x[16], HT y[16] )
-{
-    vst1q_u16(y, vaddq_u16(vld1q_u16(x), vld1q_u16(y)));
-    vst1q_u16(y + 8, vaddq_u16(vld1q_u16(x + 8), vld1q_u16(y + 8)));
+    v_store(y, v_load(y) - v_load(x));
+    v_store(y + 8, v_load(y + 8) - v_load(x + 8));
 }
 
-static inline void histogram_sub_simd( const HT x[16], HT y[16] )
-{
-    vst1q_u16(y, vsubq_u16(vld1q_u16(y), vld1q_u16(x)));
-    vst1q_u16(y + 8, vsubq_u16(vld1q_u16(y + 8), vld1q_u16(x + 8)));
-}
-
-#else
-#define MEDIAN_HAVE_SIMD 0
 #endif
 
 
@@ -2486,8 +2208,8 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
     std::vector<HT> _h_fine(16 * 16 * (STRIPE_SIZE + 2*r) * cn + 16);
     HT* h_coarse = alignPtr(&_h_coarse[0], 16);
     HT* h_fine = alignPtr(&_h_fine[0], 16);
-#if MEDIAN_HAVE_SIMD
-    volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2) || checkHardwareSupport(CV_CPU_NEON);
+#if CV_SIMD128
+    volatile bool useSIMD = hasSIMD128();
 #endif
 
     for( int x = 0; x < _dst.cols; x += STRIPE_SIZE )
@@ -2533,7 +2255,7 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
                 for( k = 0; k < 16; ++k )
                     histogram_muladd( 2*r+1, &h_fine[16*n*(16*c+k)], &H[c].fine[k][0] );
 
-            #if MEDIAN_HAVE_SIMD
+#if CV_SIMD128
                 if( useSIMD )
                 {
                     for( j = 0; j < 2*r; ++j )
@@ -2597,7 +2319,7 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
                     }
                 }
                 else
-            #endif
+#endif
                 {
                     for( j = 0; j < 2*r; ++j )
                         histogram_add( &h_coarse[16*(n*c+j)], H[c].coarse );
@@ -2871,20 +2593,20 @@ struct MinMax32f
     }
 };
 
-#if CV_SSE2
+#if CV_SIMD128
 
 struct MinMaxVec8u
 {
     typedef uchar value_type;
-    typedef __m128i arg_type;
+    typedef v_uint8x16 arg_type;
     enum { SIZE = 16 };
-    arg_type load(const uchar* ptr) { return _mm_loadu_si128((const __m128i*)ptr); }
-    void store(uchar* ptr, arg_type val) { _mm_storeu_si128((__m128i*)ptr, val); }
+    arg_type load(const uchar* ptr) { return v_load(ptr); }
+    void store(uchar* ptr, const arg_type &val) { v_store(ptr, val); }
     void operator()(arg_type& a, arg_type& b) const
     {
         arg_type t = a;
-        a = _mm_min_epu8(a, b);
-        b = _mm_max_epu8(b, t);
+        a = v_min(a, b);
+        b = v_max(b, t);
     }
 };
 
@@ -2892,80 +2614,15 @@ struct MinMaxVec8u
 struct MinMaxVec16u
 {
     typedef ushort value_type;
-    typedef __m128i arg_type;
+    typedef v_uint16x8 arg_type;
     enum { SIZE = 8 };
-    arg_type load(const ushort* ptr) { return _mm_loadu_si128((const __m128i*)ptr); }
-    void store(ushort* ptr, arg_type val) { _mm_storeu_si128((__m128i*)ptr, val); }
-    void operator()(arg_type& a, arg_type& b) const
-    {
-        arg_type t = _mm_subs_epu16(a, b);
-        a = _mm_subs_epu16(a, t);
-        b = _mm_adds_epu16(b, t);
-    }
-};
-
-
-struct MinMaxVec16s
-{
-    typedef short value_type;
-    typedef __m128i arg_type;
-    enum { SIZE = 8 };
-    arg_type load(const short* ptr) { return _mm_loadu_si128((const __m128i*)ptr); }
-    void store(short* ptr, arg_type val) { _mm_storeu_si128((__m128i*)ptr, val); }
-    void operator()(arg_type& a, arg_type& b) const
-    {
-        arg_type t = a;
-        a = _mm_min_epi16(a, b);
-        b = _mm_max_epi16(b, t);
-    }
-};
-
-
-struct MinMaxVec32f
-{
-    typedef float value_type;
-    typedef __m128 arg_type;
-    enum { SIZE = 4 };
-    arg_type load(const float* ptr) { return _mm_loadu_ps(ptr); }
-    void store(float* ptr, arg_type val) { _mm_storeu_ps(ptr, val); }
+    arg_type load(const ushort* ptr) { return v_load(ptr); }
+    void store(ushort* ptr, const arg_type &val) { v_store(ptr, val); }
     void operator()(arg_type& a, arg_type& b) const
     {
         arg_type t = a;
-        a = _mm_min_ps(a, b);
-        b = _mm_max_ps(b, t);
-    }
-};
-
-#elif CV_NEON
-
-struct MinMaxVec8u
-{
-    typedef uchar value_type;
-    typedef uint8x16_t arg_type;
-    enum { SIZE = 16 };
-    arg_type load(const uchar* ptr) { return vld1q_u8(ptr); }
-    void store(uchar* ptr, arg_type val) { vst1q_u8(ptr, val); }
-    void operator()(arg_type& a, arg_type& b) const
-    {
-        arg_type t = a;
-        a = vminq_u8(a, b);
-        b = vmaxq_u8(b, t);
-    }
-};
-
-
-struct MinMaxVec16u
-{
-    typedef ushort value_type;
-    typedef uint16x8_t arg_type;
-    enum { SIZE = 8 };
-    arg_type load(const ushort* ptr) { return vld1q_u16(ptr); }
-    void store(ushort* ptr, arg_type val) { vst1q_u16(ptr, val); }
-    void operator()(arg_type& a, arg_type& b) const
-    {
-        arg_type t = a;
-        a = vminq_u16(a, b);
-        b = vmaxq_u16(b, t);
+        a = v_min(a, b);
+        b = v_max(b, t);
     }
 };
 
@@ -2973,15 +2630,15 @@ struct MinMaxVec16u
 struct MinMaxVec16s
 {
     typedef short value_type;
-    typedef int16x8_t arg_type;
+    typedef v_int16x8 arg_type;
     enum { SIZE = 8 };
-    arg_type load(const short* ptr) { return vld1q_s16(ptr); }
-    void store(short* ptr, arg_type val) { vst1q_s16(ptr, val); }
+    arg_type load(const short* ptr) { return v_load(ptr); }
+    void store(short* ptr, const arg_type &val) { v_store(ptr, val); }
     void operator()(arg_type& a, arg_type& b) const
     {
         arg_type t = a;
-        a = vminq_s16(a, b);
-        b = vmaxq_s16(b, t);
+        a = v_min(a, b);
+        b = v_max(b, t);
     }
 };
 
@@ -2989,19 +2646,18 @@ struct MinMaxVec16s
 struct MinMaxVec32f
 {
     typedef float value_type;
-    typedef float32x4_t arg_type;
+    typedef v_float32x4 arg_type;
     enum { SIZE = 4 };
-    arg_type load(const float* ptr) { return vld1q_f32(ptr); }
-    void store(float* ptr, arg_type val) { vst1q_f32(ptr, val); }
+    arg_type load(const float* ptr) { return v_load(ptr); }
+    void store(float* ptr, const arg_type &val) { v_store(ptr, val); }
     void operator()(arg_type& a, arg_type& b) const
     {
         arg_type t = a;
-        a = vminq_f32(a, b);
-        b = vmaxq_f32(b, t);
+        a = v_min(a, b);
+        b = v_max(b, t);
     }
 };
 
-
 #else
 
 typedef MinMax8u MinMaxVec8u;
@@ -3027,7 +2683,7 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m )
     int i, j, k, cn = _src.channels();
     Op op;
     VecOp vop;
-    volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2) || checkHardwareSupport(CV_CPU_NEON);
+    volatile bool useSIMD = hasSIMD128();
 
     if( m == 3 )
     {
@@ -3478,7 +3134,7 @@ void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize )
 #endif
 
     bool useSortNet = ksize == 3 || (ksize == 5
-#if !(CV_SSE2 || CV_NEON)
+#if !(CV_SIMD128)
             && ( src0.depth() > CV_8U || src0.channels() == 2 || src0.channels() > 4 )
 #endif
         );
@@ -3513,7 +3169,7 @@ void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize )
 
         double img_size_mp = (double)(src0.total())/(1 << 20);
         if( ksize <= 3 + (img_size_mp < 1 ? 12 : img_size_mp < 4 ? 6 : 2)*
-            (MEDIAN_HAVE_SIMD && (checkHardwareSupport(CV_CPU_SSE2) || checkHardwareSupport(CV_CPU_NEON)) ? 1 : 3))
+            (CV_SIMD128 && hasSIMD128() ? 1 : 3))
             medianBlur_8u_Om( src, dst, ksize );
         else
             medianBlur_8u_O1( src, dst, ksize );
@@ -3542,12 +3198,10 @@ public:
     {
         int i, j, cn = dest->channels(), k;
         Size size = dest->size();
-        #if CV_SSE3
+#if CV_SIMD128
         int CV_DECL_ALIGNED(16) buf[4];
-        float CV_DECL_ALIGNED(16) bufSum[4];
-        static const unsigned int CV_DECL_ALIGNED(16) bufSignMask[] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
-        bool haveSSE3 = checkHardwareSupport(CV_CPU_SSE3);
-        #endif
+        bool haveSIMD128 = hasSIMD128();
+#endif
 
         for( i = range.start; i < range.end; i++ )
         {
@@ -3561,35 +3215,40 @@ public:
                     float sum = 0, wsum = 0;
                     int val0 = sptr[j];
                     k = 0;
-                    #if CV_SSE3
-                    if( haveSSE3 )
+#if CV_SIMD128
+                    if( haveSIMD128 )
                     {
-                        __m128 _val0 = _mm_set1_ps(static_cast<float>(val0));
-                        const __m128 _signMask = _mm_load_ps((const float*)bufSignMask);
+                        v_float32x4 _val0 = v_setall_f32(static_cast<float>(val0));
+                        v_float32x4 vsumw = v_setzero_f32();
+                        v_float32x4 vsumc = v_setzero_f32();
 
                         for( ; k <= maxk - 4; k += 4 )
                         {
-                            __m128 _valF = _mm_set_ps(sptr[j + space_ofs[k+3]], sptr[j + space_ofs[k+2]],
-                                                      sptr[j + space_ofs[k+1]], sptr[j + space_ofs[k]]);
-
-                            __m128 _val = _mm_andnot_ps(_signMask, _mm_sub_ps(_valF, _val0));
-                            _mm_store_si128((__m128i*)buf, _mm_cvtps_epi32(_val));
-
-                            __m128 _cw = _mm_set_ps(color_weight[buf[3]],color_weight[buf[2]],
-                                                    color_weight[buf[1]],color_weight[buf[0]]);
-                            __m128 _sw = _mm_loadu_ps(space_weight+k);
-                            __m128 _w = _mm_mul_ps(_cw, _sw);
-                             _cw = _mm_mul_ps(_w, _valF);
-
-                             _sw = _mm_hadd_ps(_w, _cw);
-                             _sw = _mm_hadd_ps(_sw, _sw);
-                             _mm_storel_pi((__m64*)bufSum, _sw);
-
-                             sum += bufSum[1];
-                             wsum += bufSum[0];
+                            v_float32x4 _valF = v_float32x4(sptr[j + space_ofs[k]],
+                                sptr[j + space_ofs[k + 1]],
+                                sptr[j + space_ofs[k + 2]],
+                                sptr[j + space_ofs[k + 3]]);
+                            v_float32x4 _val = v_abs(_valF - _val0);
+                            v_store(buf, v_round(_val));
+
+                            v_float32x4 _cw = v_float32x4(color_weight[buf[0]],
+                                color_weight[buf[1]],
+                                color_weight[buf[2]],
+                                color_weight[buf[3]]);
+                            v_float32x4 _sw = v_load(space_weight+k);
+                            v_float32x4 _w = _cw * _sw;
+                            _cw = _w * _valF;
+
+                            vsumw += _w;
+                            vsumc += _cw;
                         }
+                        float *bufFloat = (float*)buf;
+                        v_float32x4 sum4 = v_reduce_sum4(vsumw, vsumc, vsumw, vsumc);
+                        v_store(bufFloat, sum4);
+                        sum += bufFloat[1];
+                        wsum += bufFloat[0];
                     }
-                    #endif
+#endif
                     for( ; k < maxk; k++ )
                     {
                         int val = sptr[j + space_ofs[k]];
@@ -3609,58 +3268,62 @@ public:
                     float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
                     int b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2];
                     k = 0;
-                    #if CV_SSE3
-                    if( haveSSE3 )
+#if CV_SIMD128
+                    if( haveSIMD128 )
                     {
-                        const __m128i izero = _mm_setzero_si128();
-                        const __m128 _b0 = _mm_set1_ps(static_cast<float>(b0));
-                        const __m128 _g0 = _mm_set1_ps(static_cast<float>(g0));
-                        const __m128 _r0 = _mm_set1_ps(static_cast<float>(r0));
-                        const __m128 _signMask = _mm_load_ps((const float*)bufSignMask);
+                        v_float32x4 vsumw = v_setzero_f32();
+                        v_float32x4 vsumb = v_setzero_f32();
+                        v_float32x4 vsumg = v_setzero_f32();
+                        v_float32x4 vsumr = v_setzero_f32();
+                        const v_float32x4 _b0 = v_setall_f32(static_cast<float>(b0));
+                        const v_float32x4 _g0 = v_setall_f32(static_cast<float>(g0));
+                        const v_float32x4 _r0 = v_setall_f32(static_cast<float>(r0));
 
                         for( ; k <= maxk - 4; k += 4 )
                         {
-                            const int* const sptr_k0  = reinterpret_cast<const int*>(sptr + j + space_ofs[k]);
-                            const int* const sptr_k1  = reinterpret_cast<const int*>(sptr + j + space_ofs[k+1]);
-                            const int* const sptr_k2  = reinterpret_cast<const int*>(sptr + j + space_ofs[k+2]);
-                            const int* const sptr_k3  = reinterpret_cast<const int*>(sptr + j + space_ofs[k+3]);
-
-                            __m128 _b = _mm_cvtepi32_ps(_mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(sptr_k0[0]), izero), izero));
-                            __m128 _g = _mm_cvtepi32_ps(_mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(sptr_k1[0]), izero), izero));
-                            __m128 _r = _mm_cvtepi32_ps(_mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(sptr_k2[0]), izero), izero));
-                            __m128 _z = _mm_cvtepi32_ps(_mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(sptr_k3[0]), izero), izero));
-
-                            _MM_TRANSPOSE4_PS(_b, _g, _r, _z);
-
-                            __m128 bt = _mm_andnot_ps(_signMask, _mm_sub_ps(_b,_b0));
-                            __m128 gt = _mm_andnot_ps(_signMask, _mm_sub_ps(_g,_g0));
-                            __m128 rt = _mm_andnot_ps(_signMask, _mm_sub_ps(_r,_r0));
-
-                            bt =_mm_add_ps(rt, _mm_add_ps(bt, gt));
-                            _mm_store_si128((__m128i*)buf, _mm_cvtps_epi32(bt));
-
-                            __m128 _w  = _mm_set_ps(color_weight[buf[3]],color_weight[buf[2]],
-                                                    color_weight[buf[1]],color_weight[buf[0]]);
-                            __m128 _sw = _mm_loadu_ps(space_weight+k);
-
-                            _w = _mm_mul_ps(_w,_sw);
-                            _b = _mm_mul_ps(_b, _w);
-                            _g = _mm_mul_ps(_g, _w);
-                            _r = _mm_mul_ps(_r, _w);
-
-                            _w = _mm_hadd_ps(_w, _b);
-                            _g = _mm_hadd_ps(_g, _r);
-
-                            _w = _mm_hadd_ps(_w, _g);
-                            _mm_store_ps(bufSum, _w);
-
-                            wsum  += bufSum[0];
-                            sum_b += bufSum[1];
-                            sum_g += bufSum[2];
-                            sum_r += bufSum[3];
-                         }
+                            const uchar* const sptr_k0  = sptr + j + space_ofs[k];
+                            const uchar* const sptr_k1  = sptr + j + space_ofs[k+1];
+                            const uchar* const sptr_k2  = sptr + j + space_ofs[k+2];
+                            const uchar* const sptr_k3  = sptr + j + space_ofs[k+3];
+
+                            v_float32x4 __b = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q(sptr_k0)));
+                            v_float32x4 __g = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q(sptr_k1)));
+                            v_float32x4 __r = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q(sptr_k2)));
+                            v_float32x4 __z = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q(sptr_k3)));
+                            v_float32x4 _b, _g, _r, _z;
+
+                            v_transpose4x4(__b, __g, __r, __z, _b, _g, _r, _z);
+
+                            v_float32x4 bt = v_abs(_b -_b0);
+                            v_float32x4 gt = v_abs(_g -_g0);
+                            v_float32x4 rt = v_abs(_r -_r0);
+
+                            bt = rt + bt + gt;
+                            v_store(buf, v_round(bt));
+
+                            v_float32x4 _w  = v_float32x4(color_weight[buf[0]],color_weight[buf[1]],
+                                                    color_weight[buf[2]],color_weight[buf[3]]);
+                            v_float32x4 _sw = v_load(space_weight+k);
+
+                            _w *= _sw;
+                            _b *=  _w;
+                            _g *=  _w;
+                            _r *=  _w;
+
+                            vsumw += _w;
+                            vsumb += _b;
+                            vsumg += _g;
+                            vsumr += _r;
+                        }
+                        float *bufFloat = (float*)buf;
+                        v_float32x4 sum4 = v_reduce_sum4(vsumw, vsumb, vsumg, vsumr);
+                        v_store(bufFloat, sum4);
+                        wsum += bufFloat[0];
+                        sum_b += bufFloat[1];
+                        sum_g += bufFloat[2];
+                        sum_r += bufFloat[3];
                     }
-                    #endif
+#endif
 
                     for( ; k < maxk; k++ )
                     {
@@ -3859,16 +3522,10 @@ public:
     {
         int i, j, k;
         Size size = dest->size();
-        #if CV_SSE3 || CV_NEON
+#if CV_SIMD128
         int CV_DECL_ALIGNED(16) idxBuf[4];
-        float CV_DECL_ALIGNED(16) bufSum32[4];
-        static const unsigned int CV_DECL_ALIGNED(16) bufSignMask[] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
-        #endif
-        #if CV_SSE3
-        bool haveSSE3 = checkHardwareSupport(CV_CPU_SSE3);
-        #elif CV_NEON
-        bool haveNEON = checkHardwareSupport(CV_CPU_NEON);
-        #endif
+        bool haveSIMD128 = hasSIMD128();
+#endif
 
         for( i = range.start; i < range.end; i++ )
         {
@@ -3882,84 +3539,49 @@ public:
                     float sum = 0, wsum = 0;
                     float val0 = sptr[j];
                     k = 0;
-                    #if CV_SSE3
-                    if( haveSSE3 )
-                    {
-                        __m128 psum = _mm_setzero_ps();
-                        const __m128 _val0 = _mm_set1_ps(sptr[j]);
-                        const __m128 _scale_index = _mm_set1_ps(scale_index);
-                        const __m128 _signMask = _mm_load_ps((const float*)bufSignMask);
-
-                        for( ; k <= maxk - 4 ; k += 4 )
-                        {
-                            __m128 _sw    = _mm_loadu_ps(space_weight + k);
-                            __m128 _val   = _mm_set_ps(sptr[j + space_ofs[k+3]], sptr[j + space_ofs[k+2]],
-                                                       sptr[j + space_ofs[k+1]], sptr[j + space_ofs[k]]);
-                            __m128 _alpha = _mm_mul_ps(_mm_andnot_ps( _signMask, _mm_sub_ps(_val,_val0)), _scale_index);
-
-                            __m128i _idx = _mm_cvtps_epi32(_alpha);
-                            _mm_store_si128((__m128i*)idxBuf, _idx);
-                            _alpha = _mm_sub_ps(_alpha, _mm_cvtepi32_ps(_idx));
-
-                            __m128 _explut  = _mm_set_ps(expLUT[idxBuf[3]], expLUT[idxBuf[2]],
-                                                         expLUT[idxBuf[1]], expLUT[idxBuf[0]]);
-                            __m128 _explut1 = _mm_set_ps(expLUT[idxBuf[3]+1], expLUT[idxBuf[2]+1],
-                                                         expLUT[idxBuf[1]+1], expLUT[idxBuf[0]+1]);
-
-                            __m128 _w = _mm_mul_ps(_sw, _mm_add_ps(_explut, _mm_mul_ps(_alpha, _mm_sub_ps(_explut1, _explut))));
-                            _val = _mm_mul_ps(_w, _val);
-
-                            _sw = _mm_hadd_ps(_w, _val);
-                            _sw = _mm_hadd_ps(_sw, _sw);
-                            psum = _mm_add_ps(_sw, psum);
-                        }
-                        _mm_storel_pi((__m64*)bufSum32, psum);
-
-                        sum = bufSum32[1];
-                        wsum = bufSum32[0];
-                    }
-                    #elif CV_NEON
-                    if( haveNEON )
+#if CV_SIMD128
+                    if( haveSIMD128 )
                     {
-                        float32x2_t psum = vdup_n_f32(0.0f);
-                        const volatile float32x4_t _val0 = vdupq_n_f32(sptr[j]);
-                        const float32x4_t _scale_index = vdupq_n_f32(scale_index);
-                        const uint32x4_t _signMask = vld1q_u32(bufSignMask);
+                        v_float32x4 vecwsum = v_setzero_f32();
+                        v_float32x4 vecvsum = v_setzero_f32();
+                        const v_float32x4 _val0 = v_setall_f32(sptr[j]);
+                        const v_float32x4 _scale_index = v_setall_f32(scale_index);
 
-                        for( ; k <= maxk - 4 ; k += 4 )
+                        for (; k <= maxk - 4; k += 4)
                         {
-                            float32x4_t _sw  = vld1q_f32(space_weight + k);
-                            float CV_DECL_ALIGNED(16) _data[] = {sptr[j + space_ofs[k]],   sptr[j + space_ofs[k+1]],
-                                                                 sptr[j + space_ofs[k+2]], sptr[j + space_ofs[k+3]],};
-                            float32x4_t _val = vld1q_f32(_data);
-                            float32x4_t _alpha = vsubq_f32(_val, _val0);
-                            _alpha = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(_alpha), _signMask));
-                            _alpha = vmulq_f32(_alpha, _scale_index);
-                            int32x4_t _idx = vcvtq_s32_f32(_alpha);
-                            vst1q_s32(idxBuf, _idx);
-                            _alpha = vsubq_f32(_alpha, vcvtq_f32_s32(_idx));
-
-                            bufSum32[0] = expLUT[idxBuf[0]];
-                            bufSum32[1] = expLUT[idxBuf[1]];
-                            bufSum32[2] = expLUT[idxBuf[2]];
-                            bufSum32[3] = expLUT[idxBuf[3]];
-                            float32x4_t _explut = vld1q_f32(bufSum32);
-                            bufSum32[0] = expLUT[idxBuf[0]+1];
-                            bufSum32[1] = expLUT[idxBuf[1]+1];
-                            bufSum32[2] = expLUT[idxBuf[2]+1];
-                            bufSum32[3] = expLUT[idxBuf[3]+1];
-                            float32x4_t _explut1 = vld1q_f32(bufSum32);
-
-                            float32x4_t _w = vmulq_f32(_sw, vaddq_f32(_explut, vmulq_f32(_alpha, vsubq_f32(_explut1, _explut))));
-                            _val = vmulq_f32(_w, _val);
-
-                            float32x2_t _wval = vpadd_f32(vpadd_f32(vget_low_f32(_w),vget_high_f32(_w)), vpadd_f32(vget_low_f32(_val), vget_high_f32(_val)));
-                            psum = vadd_f32(_wval, psum);
+                            v_float32x4 _sw = v_load(space_weight + k);
+                            v_float32x4 _val = v_float32x4(sptr[j + space_ofs[k]],
+                                sptr[j + space_ofs[k + 1]],
+                                sptr[j + space_ofs[k + 2]],
+                                sptr[j + space_ofs[k + 3]]);
+                            v_float32x4 _alpha = v_abs(_val - _val0) * _scale_index;
+
+                            v_int32x4 _idx = v_round(_alpha);
+                            v_store(idxBuf, _idx);
+                            _alpha -= v_cvt_f32(_idx);
+
+                            v_float32x4 _explut = v_float32x4(expLUT[idxBuf[0]],
+                                expLUT[idxBuf[1]],
+                                expLUT[idxBuf[2]],
+                                expLUT[idxBuf[3]]);
+                            v_float32x4 _explut1 = v_float32x4(expLUT[idxBuf[0] + 1],
+                                expLUT[idxBuf[1] + 1],
+                                expLUT[idxBuf[2] + 1],
+                                expLUT[idxBuf[3] + 1]);
+
+                            v_float32x4 _w = _sw * (_explut + (_alpha * (_explut1 - _explut)));
+                            _val *= _w;
+
+                            vecwsum += _w;
+                            vecvsum += _val;
                         }
-                        sum = vget_lane_f32(psum, 1);
-                        wsum = vget_lane_f32(psum, 0);
+                        float *bufFloat = (float*)idxBuf;
+                        v_float32x4 sum4 = v_reduce_sum4(vecwsum, vecvsum, vecwsum, vecvsum);
+                        v_store(bufFloat, sum4);
+                        sum += bufFloat[1];
+                        wsum += bufFloat[0];
                     }
-                    #endif
+#endif
 
                     for( ; k < maxk; k++ )
                     {
@@ -3982,129 +3604,70 @@ public:
                     float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
                     float b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2];
                     k = 0;
-                    #if  CV_SSE3
-                    if( haveSSE3 )
-                    {
-                        __m128 sum = _mm_setzero_ps();
-                        const __m128 _b0 = _mm_set1_ps(b0);
-                        const __m128 _g0 = _mm_set1_ps(g0);
-                        const __m128 _r0 = _mm_set1_ps(r0);
-                        const __m128 _scale_index = _mm_set1_ps(scale_index);
-                        const __m128 _signMask = _mm_load_ps((const float*)bufSignMask);
-
-                        for( ; k <= maxk-4; k += 4 )
-                        {
-                            __m128 _sw = _mm_loadu_ps(space_weight + k);
-
-                            const float* const sptr_k0 = sptr + j + space_ofs[k];
-                            const float* const sptr_k1 = sptr + j + space_ofs[k+1];
-                            const float* const sptr_k2 = sptr + j + space_ofs[k+2];
-                            const float* const sptr_k3 = sptr + j + space_ofs[k+3];
-
-                            __m128 _b = _mm_loadu_ps(sptr_k0);
-                            __m128 _g = _mm_loadu_ps(sptr_k1);
-                            __m128 _r = _mm_loadu_ps(sptr_k2);
-                            __m128 _z = _mm_loadu_ps(sptr_k3);
-                            _MM_TRANSPOSE4_PS(_b, _g, _r, _z);
-
-                            __m128 _bt = _mm_andnot_ps(_signMask,_mm_sub_ps(_b,_b0));
-                            __m128 _gt = _mm_andnot_ps(_signMask,_mm_sub_ps(_g,_g0));
-                            __m128 _rt = _mm_andnot_ps(_signMask,_mm_sub_ps(_r,_r0));
-
-                            __m128 _alpha = _mm_mul_ps(_scale_index, _mm_add_ps(_rt,_mm_add_ps(_bt, _gt)));
-
-                            __m128i _idx  = _mm_cvtps_epi32(_alpha);
-                            _mm_store_si128((__m128i*)idxBuf, _idx);
-                            _alpha = _mm_sub_ps(_alpha, _mm_cvtepi32_ps(_idx));
-
-                            __m128 _explut  = _mm_set_ps(expLUT[idxBuf[3]], expLUT[idxBuf[2]], expLUT[idxBuf[1]], expLUT[idxBuf[0]]);
-                            __m128 _explut1 = _mm_set_ps(expLUT[idxBuf[3]+1], expLUT[idxBuf[2]+1], expLUT[idxBuf[1]+1], expLUT[idxBuf[0]+1]);
-
-                            __m128 _w = _mm_mul_ps(_sw, _mm_add_ps(_explut, _mm_mul_ps(_alpha, _mm_sub_ps(_explut1, _explut))));
-
-                            _b = _mm_mul_ps(_b, _w);
-                            _g = _mm_mul_ps(_g, _w);
-                            _r = _mm_mul_ps(_r, _w);
-
-                             _w = _mm_hadd_ps(_w, _b);
-                             _g = _mm_hadd_ps(_g, _r);
-
-                             _w = _mm_hadd_ps(_w, _g);
-                             sum = _mm_add_ps(sum, _w);
-                        }
-                        _mm_store_ps(bufSum32, sum);
-                        wsum  = bufSum32[0];
-                        sum_b = bufSum32[1];
-                        sum_g = bufSum32[2];
-                        sum_r = bufSum32[3];
-                    }
-                    #elif CV_NEON
-                    if( haveNEON )
+#if CV_SIMD128
+                    if( haveSIMD128 )
                     {
-                        float32x4_t sum = vdupq_n_f32(0.0f);
-                        const float32x4_t _b0 = vdupq_n_f32(b0);
-                        const float32x4_t _g0 = vdupq_n_f32(g0);
-                        const float32x4_t _r0 = vdupq_n_f32(r0);
-                        const float32x4_t _scale_index = vdupq_n_f32(scale_index);
-                        const uint32x4_t _signMask = vld1q_u32(bufSignMask);
+                        v_float32x4 sumw = v_setzero_f32();
+                        v_float32x4 sumb = v_setzero_f32();
+                        v_float32x4 sumg = v_setzero_f32();
+                        v_float32x4 sumr = v_setzero_f32();
+                        const v_float32x4 _b0 = v_setall_f32(b0);
+                        const v_float32x4 _g0 = v_setall_f32(g0);
+                        const v_float32x4 _r0 = v_setall_f32(r0);
+                        const v_float32x4 _scale_index = v_setall_f32(scale_index);
 
                         for( ; k <= maxk-4; k += 4 )
                         {
-                            float32x4_t _sw = vld1q_f32(space_weight + k);
+                            v_float32x4 _sw = v_load(space_weight + k);
 
                             const float* const sptr_k0 = sptr + j + space_ofs[k];
                             const float* const sptr_k1 = sptr + j + space_ofs[k+1];
                             const float* const sptr_k2 = sptr + j + space_ofs[k+2];
                             const float* const sptr_k3 = sptr + j + space_ofs[k+3];
 
-                            float32x4_t _v0 = vld1q_f32(sptr_k0);
-                            float32x4_t _v1 = vld1q_f32(sptr_k1);
-                            float32x4_t _v2 = vld1q_f32(sptr_k2);
-                            float32x4_t _v3 = vld1q_f32(sptr_k3);
-
-                            float32x4x2_t v01 = vtrnq_f32(_v0, _v1);
-                            float32x4x2_t v23 = vtrnq_f32(_v2, _v3);
-                            float32x4_t _b = vcombine_f32(vget_low_f32(v01.val[0]), vget_low_f32(v23.val[0]));
-                            float32x4_t _g = vcombine_f32(vget_low_f32(v01.val[1]), vget_low_f32(v23.val[1]));
-                            float32x4_t _r = vcombine_f32(vget_high_f32(v01.val[0]), vget_high_f32(v23.val[0]));
-
-                            float32x4_t _bt = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vsubq_f32(_b, _b0)), _signMask));
-                            float32x4_t _gt = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vsubq_f32(_g, _g0)), _signMask));
-                            float32x4_t _rt = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vsubq_f32(_r, _r0)), _signMask));
-                            float32x4_t _alpha = vmulq_f32(_scale_index, vaddq_f32(_bt, vaddq_f32(_gt, _rt)));
-
-                            int32x4_t _idx = vcvtq_s32_f32(_alpha);
-                            vst1q_s32((int*)idxBuf, _idx);
-                            bufSum32[0] = expLUT[idxBuf[0]];
-                            bufSum32[1] = expLUT[idxBuf[1]];
-                            bufSum32[2] = expLUT[idxBuf[2]];
-                            bufSum32[3] = expLUT[idxBuf[3]];
-                            float32x4_t _explut = vld1q_f32(bufSum32);
-                            bufSum32[0] = expLUT[idxBuf[0]+1];
-                            bufSum32[1] = expLUT[idxBuf[1]+1];
-                            bufSum32[2] = expLUT[idxBuf[2]+1];
-                            bufSum32[3] = expLUT[idxBuf[3]+1];
-                            float32x4_t _explut1 = vld1q_f32(bufSum32);
-
-                            float32x4_t _w = vmulq_f32(_sw, vaddq_f32(_explut, vmulq_f32(_alpha, vsubq_f32(_explut1, _explut))));
-
-                            _b = vmulq_f32(_b, _w);
-                            _g = vmulq_f32(_g, _w);
-                            _r = vmulq_f32(_r, _w);
-
-                            float32x2_t _wb = vpadd_f32(vpadd_f32(vget_low_f32(_w),vget_high_f32(_w)), vpadd_f32(vget_low_f32(_b), vget_high_f32(_b)));
-                            float32x2_t _gr = vpadd_f32(vpadd_f32(vget_low_f32(_g),vget_high_f32(_g)), vpadd_f32(vget_low_f32(_r), vget_high_f32(_r)));
-
-                            _w = vcombine_f32(_wb, _gr);
-                            sum = vaddq_f32(sum, _w);
+                            v_float32x4 _v0 = v_load(sptr_k0);
+                            v_float32x4 _v1 = v_load(sptr_k1);
+                            v_float32x4 _v2 = v_load(sptr_k2);
+                            v_float32x4 _v3 = v_load(sptr_k3);
+                            v_float32x4 _b, _g, _r, _dummy;
+
+                            v_transpose4x4(_v0, _v1, _v2, _v3, _b, _g, _r, _dummy);
+
+                            v_float32x4 _bt = v_abs(_b - _b0);
+                            v_float32x4 _gt = v_abs(_g - _g0);
+                            v_float32x4 _rt = v_abs(_r - _r0);
+                            v_float32x4 _alpha = _scale_index * (_bt + _gt + _rt);
+
+                            v_int32x4 _idx = v_round(_alpha);
+                            v_store((int*)idxBuf, _idx);
+                            v_float32x4 _explut = v_float32x4(expLUT[idxBuf[0]],
+                                expLUT[idxBuf[1]],
+                                expLUT[idxBuf[2]],
+                                expLUT[idxBuf[3]]);
+                            v_float32x4 _explut1 = v_float32x4(expLUT[idxBuf[0] + 1],
+                                expLUT[idxBuf[1] + 1],
+                                expLUT[idxBuf[2] + 1],
+                                expLUT[idxBuf[3] + 1]);
+
+                            v_float32x4 _w = _sw * (_explut + (_alpha * (_explut1 - _explut)));
+
+                            _b *=  _w;
+                            _g *=  _w;
+                            _r *=  _w;
+                            sumw += _w;
+                            sumb += _b;
+                            sumg += _g;
+                            sumr += _r;
                         }
-                        vst1q_f32(bufSum32, sum);
-                        wsum  = bufSum32[0];
-                        sum_b = bufSum32[1];
-                        sum_g = bufSum32[2];
-                        sum_r = bufSum32[3];
+                        v_float32x4 sum4 = v_reduce_sum4(sumw, sumb, sumg, sumr);
+                        float *bufFloat = (float*)idxBuf;
+                        v_store(bufFloat, sum4);
+                        wsum += bufFloat[0];
+                        sum_b += bufFloat[1];
+                        sum_g += bufFloat[2];
+                        sum_r += bufFloat[3];
                     }
-                    #endif
+#endif
 
                     for(; k < maxk; k++ )
                     {
index c26fe75..c9ea9b8 100644 (file)
@@ -161,16 +161,8 @@ int initUndistortRectifyMapLine_AVX(float* m1f, float* m2f, short* m1, ushort* m
             __u = _mm256_mul_pd(__u, _mm256_set1_pd(INTER_TAB_SIZE));
             __v = _mm256_mul_pd(__v, _mm256_set1_pd(INTER_TAB_SIZE));
 
-            __m128 __u_float = _mm256_cvtpd_ps(__u);
-            __m128 __v_float = _mm256_cvtpd_ps(__v);
-            _mm256_zeroupper();
-            static const __m128 __int_max = _mm_set1_ps((float)(std::numeric_limits<int>::max()));
-            static const __m128 __int_min = _mm_set1_ps((float)(std::numeric_limits<int>::min()));
-            __u_float = _mm_max_ps(_mm_min_ps(__u_float, __int_max), __int_min);
-            __v_float = _mm_max_ps(_mm_min_ps(__v_float, __int_max), __int_min);
-
-            __m128i __iu = _mm_cvtps_epi32(__u_float);
-            __m128i __iv = _mm_cvtps_epi32(__v_float);
+            __m128i __iu = _mm256_cvtpd_epi32(__u);
+            __m128i __iv = _mm256_cvtpd_epi32(__v);
 
             static const __m128i __INTER_TAB_SIZE_m1 = _mm_set1_epi32(INTER_TAB_SIZE - 1);
             __m128i __m2 = _mm_add_epi32(
@@ -192,6 +184,8 @@ int initUndistortRectifyMapLine_AVX(float* m1f, float* m2f, short* m1, ushort* m
         }
     }
 
+    _mm256_zeroupper();
+
     return j;
 }
 
diff --git a/modules/imgproc/test/test_fitellipseAMS.cpp b/modules/imgproc/test/test_fitellipseAMS.cpp
new file mode 100644 (file)
index 0000000..405d1f4
--- /dev/null
@@ -0,0 +1,441 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2016, Itseez, Inc, all rights reserved.
+
+#include "test_precomp.hpp"
+#include <vector>
+#include <cmath>
+
+using namespace cv;
+using namespace std;
+
+TEST(Imgproc_FitEllipseAMS_Issue_1, accuracy) {
+    vector<Point2f>pts;
+    pts.push_back(Point2f(173.41854895999165f, 125.84473135880411f));
+    pts.push_back(Point2f(180.63769498640912f, 130.960006577589f));
+    pts.push_back(Point2f(174.99173759130173f, 137.34265632926764f));
+    pts.push_back(Point2f(170.9044645313217f, 141.68017556480243f));
+    pts.push_back(Point2f(163.48965388499656f, 141.9404438924043f));
+    pts.push_back(Point2f(159.37687818401147f, 148.60835331594876f));
+    pts.push_back(Point2f(150.38917629356735f, 155.68825577720446f));
+    pts.push_back(Point2f(147.16319653316862f, 157.06039984963923f));
+    pts.push_back(Point2f(141.73118707843207f, 157.2570155198414f));
+    pts.push_back(Point2f(130.61569602948597f, 159.40742182929364f));
+    pts.push_back(Point2f(127.00573042229027f, 161.34430232187867f));
+    pts.push_back(Point2f(120.49383815053747f, 163.72610883128334f));
+    pts.push_back(Point2f(114.62383760040998f, 162.6788666385239f));
+    pts.push_back(Point2f(108.84871269183333f, 161.90597054388132f));
+    pts.push_back(Point2f(103.04574087829076f, 167.44352944383985f));
+    pts.push_back(Point2f(96.31623870161255f, 163.71641295746116f));
+    pts.push_back(Point2f(89.86174417295126f, 157.2967811253635f));
+    pts.push_back(Point2f(84.27940674801192f, 168.6331304010667f));
+    pts.push_back(Point2f(76.61995117937661f, 159.4445412678832f));
+    pts.push_back(Point2f(72.22526316142418f, 154.60770776728293f));
+    pts.push_back(Point2f(64.97742405067658f, 152.3687174339018f));
+    pts.push_back(Point2f(58.34612797237003f, 155.61116802371583f));
+    pts.push_back(Point2f(55.59089117268539f, 148.56245696566418f));
+    pts.push_back(Point2f(45.22711195983706f, 145.6713241271927f));
+    pts.push_back(Point2f(40.090542298840234f, 142.36141304004002f));
+    pts.push_back(Point2f(31.788996807277414f, 136.26164877915585f));
+    pts.push_back(Point2f(27.27613006088805f, 137.46860042141503f));
+    pts.push_back(Point2f(23.972392188502226f, 129.17993872328594f));
+    pts.push_back(Point2f(20.688046711616977f, 121.52750840733087f));
+    pts.push_back(Point2f(14.635115184257643f, 115.36942800110485f));
+    pts.push_back(Point2f(14.850919318756809f, 109.43609786936987f));
+    pts.push_back(Point2f(7.476847697758103f, 102.67657265589285f));
+    pts.push_back(Point2f(1.8896944088091914f, 95.78878215565676f));
+    pts.push_back(Point2f(1.731997022935417f, 88.17674033990495f));
+    pts.push_back(Point2f(1.6780841363402033f, 80.65581939883002f));
+    pts.push_back(Point2f(0.035330281415411946f, 73.1088693846768f));
+    pts.push_back(Point2f(0.14652518786238033f, 65.42769523404296f));
+    pts.push_back(Point2f(6.99914645302843f, 58.436451064804245f));
+    pts.push_back(Point2f(6.719616410428614f, 50.15263031354927f));
+    pts.push_back(Point2f(5.122267598477748f, 46.03603214691343f));
+
+    bool AMSGoodQ;
+    float tol = 0.01f;
+
+    RotatedRect     ellipseAMSTrue = cv::RotatedRect(Point2f(94.4037f, 84.743f), Size2f(190.614f, 153.543f), 19.832f);
+    RotatedRect     ellipseAMSTest = fitEllipseAMS(pts);
+    Point2f         ellipseAMSTrueVertices[4];
+    Point2f         ellipseAMSTestVertices[4];
+    ellipseAMSTest.points(ellipseAMSTestVertices);
+    ellipseAMSTrue.points(ellipseAMSTrueVertices);
+    float AMSDiff = 0.0f;
+    for (size_t i=0; i <=3; i++) {
+        Point2f diff = ellipseAMSTrueVertices[i] - ellipseAMSTestVertices[0];
+        float d = diff.x * diff.x + diff.y * diff.y;
+        for (size_t j=1; i <=3; i++) {
+            diff = ellipseAMSTrueVertices[i] - ellipseAMSTestVertices[j];
+            float dd = diff.x * diff.x + diff.y * diff.y;
+            if(dd<d){d=dd;}
+        }
+        AMSDiff += std::sqrt(d);
+    }
+    AMSGoodQ = AMSDiff < tol;
+
+    EXPECT_TRUE(AMSGoodQ);
+}
+
+TEST(Imgproc_FitEllipseAMS_Issue_2, accuracy) {
+    vector<Point2f>pts;
+    pts.push_back(Point2f(436.59985753246326f, 99.52113368023126f));
+    pts.push_back(Point2f(454.40214161915856f, 160.47565296546912f));
+    pts.push_back(Point2f(406.01996690372687f, 215.41999534561575f));
+    pts.push_back(Point2f(362.8738685722881f, 262.1842668997318f));
+    pts.push_back(Point2f(300.72864073265407f, 290.8182699272777f));
+    pts.push_back(Point2f(247.62963883830972f, 311.383137106776f));
+    pts.push_back(Point2f(194.15394659099445f, 313.30260991427565f));
+    pts.push_back(Point2f(138.934393338296f, 310.50203123324223f));
+    pts.push_back(Point2f(91.66999301197541f, 300.57303988670515f));
+    pts.push_back(Point2f(28.286233855826133f, 268.0670159317756f));
+
+    bool AMSGoodQ;
+    float tol = 0.01f;
+
+    RotatedRect     ellipseAMSTrue = cv::RotatedRect(Point2f(223.917f, 169.701f), Size2f(456.628f, 277.809f), -12.6378f);
+    RotatedRect     ellipseAMSTest = fitEllipseAMS(pts);
+    Point2f         ellipseAMSTrueVertices[4];
+    Point2f         ellipseAMSTestVertices[4];
+    ellipseAMSTest.points(ellipseAMSTestVertices);
+    ellipseAMSTrue.points(ellipseAMSTrueVertices);
+    float AMSDiff = 0.0f;
+    for (size_t i=0; i <=3; i++) {
+        Point2f diff = ellipseAMSTrueVertices[i] - ellipseAMSTestVertices[0];
+        float d = diff.x * diff.x + diff.y * diff.y;
+        for (size_t j=1; i <=3; i++) {
+            diff = ellipseAMSTrueVertices[i] - ellipseAMSTestVertices[j];
+            float dd = diff.x * diff.x + diff.y * diff.y;
+            if(dd<d){d=dd;}
+        }
+        AMSDiff += std::sqrt(d);
+    }
+    AMSGoodQ = AMSDiff < tol;
+
+    EXPECT_TRUE(AMSGoodQ);
+}
+
+
+TEST(Imgproc_FitEllipseAMS_Issue_3, accuracy) {
+    vector<Point2f>pts;
+    pts.push_back(Point2f(459.59217920219083f, 480.1054989283611f));
+    pts.push_back(Point2f(427.2759071813645f, 501.82653857689616f));
+    pts.push_back(Point2f(388.35145730295574f, 520.9488690267101f));
+    pts.push_back(Point2f(349.53248668650656f, 522.9153107979839f));
+    pts.push_back(Point2f(309.56018996762094f, 527.449631776843f));
+    pts.push_back(Point2f(272.07480726768665f, 508.12367135706165f));
+    pts.push_back(Point2f(234.69230939247115f, 519.8943877180591f));
+    pts.push_back(Point2f(201.65185545142472f, 509.47870288702813f));
+    pts.push_back(Point2f(169.37222144138462f, 498.2681549419808f));
+    pts.push_back(Point2f(147.96233740677815f, 467.0923094529034f));
+    pts.push_back(Point2f(109.68331701139209f, 433.39069422941986f));
+    pts.push_back(Point2f(81.95454413977822f, 397.34325168750087f));
+    pts.push_back(Point2f(63.74923800767195f, 371.939105294963f));
+    pts.push_back(Point2f(39.966434417279885f, 329.9581349942296f));
+    pts.push_back(Point2f(21.581668415402532f, 292.6692716276865f));
+    pts.push_back(Point2f(13.687334926511767f, 248.91164234903772f));
+    pts.push_back(Point2f(0.0f, 201.25693715845716f));
+    pts.push_back(Point2f(3.90259455356599f, 155.68155247210575f));
+    pts.push_back(Point2f(39.683930802331844f, 110.26290871953987f));
+    pts.push_back(Point2f(47.85826684019932f, 70.82454140948524f));
+
+    bool AMSGoodQ;
+    float tol = 0.01f;
+
+    RotatedRect     ellipseAMSTrue = cv::RotatedRect(Point2f(266.796f, 260.167f), Size2f(580.374f, 469.465f), 50.3961f);
+    RotatedRect     ellipseAMSTest = fitEllipseAMS(pts);
+    Point2f         ellipseAMSTrueVertices[4];
+    Point2f         ellipseAMSTestVertices[4];
+    ellipseAMSTest.points(ellipseAMSTestVertices);
+    ellipseAMSTrue.points(ellipseAMSTrueVertices);
+    float AMSDiff = 0.0f;
+    for (size_t i=0; i <=3; i++) {
+        Point2f diff = ellipseAMSTrueVertices[i] - ellipseAMSTestVertices[0];
+        float d = diff.x * diff.x + diff.y * diff.y;
+        for (size_t j=1; i <=3; i++) {
+            diff = ellipseAMSTrueVertices[i] - ellipseAMSTestVertices[j];
+            float dd = diff.x * diff.x + diff.y * diff.y;
+            if(dd<d){d=dd;}
+        }
+        AMSDiff += std::sqrt(d);
+    }
+    AMSGoodQ = AMSDiff < tol;
+
+    EXPECT_TRUE(AMSGoodQ);
+}
+
+TEST(Imgproc_FitEllipseAMS_Issue_4, accuracy) {
+    vector<Point2f>pts;
+    pts.push_back(Point2f(461.1761758124861f, 79.55196261616746f));
+    pts.push_back(Point2f(470.5034888757249f, 100.56760245239015f));
+    pts.push_back(Point2f(470.7814479849749f, 127.45783922150272f));
+    pts.push_back(Point2f(465.214384653262f, 157.51792078285405f));
+    pts.push_back(Point2f(465.3739691861813f, 185.89204350118942f));
+    pts.push_back(Point2f(443.36043162278366f, 214.43399982709002f));
+    pts.push_back(Point2f(435.04682693174095f, 239.2657073987589f));
+    pts.push_back(Point2f(444.48553588292697f, 262.0816619678671f));
+    pts.push_back(Point2f(407.1290185495328f, 285.07828783776347f));
+    pts.push_back(Point2f(397.71436554935804f, 304.782713567108f));
+    pts.push_back(Point2f(391.65678619785854f, 323.6809382153118f));
+    pts.push_back(Point2f(366.3904205781036f, 328.09416679736563f));
+    pts.push_back(Point2f(341.7656517790918f, 346.9672607008338f));
+    pts.push_back(Point2f(335.8021864809171f, 358.22416661090296f));
+    pts.push_back(Point2f(313.29224574204227f, 373.3267160317279f));
+    pts.push_back(Point2f(291.121216115417f, 377.3339312050791f));
+    pts.push_back(Point2f(284.20367595990547f, 389.5930108233698f));
+    pts.push_back(Point2f(270.9682061106809f, 388.4352006517971f));
+    pts.push_back(Point2f(253.10188273008825f, 392.35120876055373f));
+    pts.push_back(Point2f(234.2306946938868f, 407.0773705761117f));
+    pts.push_back(Point2f(217.0544384092144f, 407.54850609237235f));
+    pts.push_back(Point2f(198.40910966657933f, 423.7008860314684f));
+    pts.push_back(Point2f(175.47011114845057f, 420.4223434173364f));
+    pts.push_back(Point2f(154.92083551695902f, 418.5288198459268f));
+    pts.push_back(Point2f(136.52988517939698f, 417.8311217226818f));
+    pts.push_back(Point2f(114.74657291069317f, 410.1534699388714f));
+    pts.push_back(Point2f(78.9220388330042f, 397.6266608135022f));
+    pts.push_back(Point2f(76.82658673144391f, 404.27399269891055f));
+    pts.push_back(Point2f(50.953595435605116f, 386.3824077178053f));
+    pts.push_back(Point2f(43.603489077456985f, 368.7894972436907f));
+    pts.push_back(Point2f(19.37402592752713f, 343.3511017547511f));
+    pts.push_back(Point2f(8.714663367287343f, 322.2148323327599f));
+    pts.push_back(Point2f(0., 288.7836318007535f));
+    pts.push_back(Point2f(3.98686689837605f, 263.1748167870333f));
+    pts.push_back(Point2f(9.536389714519785f, 233.02995195684738f));
+    pts.push_back(Point2f(17.83246556512455f, 205.6536519851621f));
+    pts.push_back(Point2f(33.00593702846919f, 180.52628138608327f));
+    pts.push_back(Point2f(41.572400996463394f, 153.95185568689314f));
+    pts.push_back(Point2f(54.55733659450332f, 136.54322891729444f));
+    pts.push_back(Point2f(78.60990563833005f, 112.76538180538182f));
+
+    bool AMSGoodQ;
+    float tol = 0.01f;
+
+    RotatedRect     ellipseAMSTrue = cv::RotatedRect(Point2f(237.108f, 207.32f), Size2f(517.287f, 357.591f), -36.3653f);
+    RotatedRect     ellipseAMSTest = fitEllipseAMS(pts);
+    Point2f         ellipseAMSTrueVertices[4];
+    Point2f         ellipseAMSTestVertices[4];
+    ellipseAMSTest.points(ellipseAMSTestVertices);
+    ellipseAMSTrue.points(ellipseAMSTrueVertices);
+    float AMSDiff = 0.0f;
+    for (size_t i=0; i <=3; i++) {
+        Point2f diff = ellipseAMSTrueVertices[i] - ellipseAMSTestVertices[0];
+        float d = diff.x * diff.x + diff.y * diff.y;
+        for (size_t j=1; i <=3; i++) {
+            diff = ellipseAMSTrueVertices[i] - ellipseAMSTestVertices[j];
+            float dd = diff.x * diff.x + diff.y * diff.y;
+            if(dd<d){d=dd;}
+        }
+        AMSDiff += std::sqrt(d);
+    }
+    AMSGoodQ = AMSDiff < tol;
+
+    EXPECT_TRUE(AMSGoodQ);
+}
+
+
+
+TEST(Imgproc_FitEllipseAMS_Issue_5, accuracy) {
+    vector<Point2f>pts;
+    pts.push_back(Point2f(509.60609444351917f, 484.8233016998119f));
+    pts.push_back(Point2f(508.55357451809846f, 498.61004779125176f));
+    pts.push_back(Point2f(495.59325478416525f, 507.9238702677585f));
+    pts.push_back(Point2f(455.32905012177747f, 517.7518674113691f));
+    pts.push_back(Point2f(461.24821761238667f, 524.2115477440211f));
+    pts.push_back(Point2f(438.8983455906825f, 528.424911702069f));
+    pts.push_back(Point2f(425.9259699875303f, 532.5700430134499f));
+    pts.push_back(Point2f(405.77496728300616f, 535.7295008444993f));
+    pts.push_back(Point2f(384.31968113982475f, 536.3076260371831f));
+    pts.push_back(Point2f(381.5356536818977f, 540.183355729414f));
+    pts.push_back(Point2f(378.2530503455792f, 540.2871855284832f));
+    pts.push_back(Point2f(357.7242088314752f, 543.473075733281f));
+    pts.push_back(Point2f(339.27871831324853f, 541.2099003613087f));
+    pts.push_back(Point2f(339.22481874867435f, 541.1105421426018f));
+    pts.push_back(Point2f(331.50337377509396f, 539.7296050163102f));
+    pts.push_back(Point2f(317.8306501537862f, 540.9077275195326f));
+    pts.push_back(Point2f(304.9192648323086f, 541.3434792768918f));
+    pts.push_back(Point2f(297.33855427908617f, 543.0590309600501f));
+    pts.push_back(Point2f(288.95330515997694f, 543.8756702506837f));
+    pts.push_back(Point2f(278.5850913122515f, 538.1343888329859f));
+    pts.push_back(Point2f(266.05355938101724f, 538.4115695907074f));
+    pts.push_back(Point2f(255.30186994366096f, 534.2459272411796f));
+    pts.push_back(Point2f(238.52054973466758f, 537.5007401480628f));
+    pts.push_back(Point2f(228.444463024996f, 533.8992361116678f));
+    pts.push_back(Point2f(217.8111623149833f, 538.2269193558991f));
+    pts.push_back(Point2f(209.43502138981037f, 532.8057062984569f));
+    pts.push_back(Point2f(193.33570716763276f, 527.2038128630041f));
+    pts.push_back(Point2f(172.66725340039625f, 526.4020881005537f));
+    pts.push_back(Point2f(158.33654199771337f, 525.2093856704676f));
+    pts.push_back(Point2f(148.65905485249067f, 521.0146762179431f));
+    pts.push_back(Point2f(147.6615365176719f, 517.4315201992808f));
+    pts.push_back(Point2f(122.43568509949394f, 514.2089723387337f));
+    pts.push_back(Point2f(110.88482982039073f, 509.14004840857046f));
+    pts.push_back(Point2f(107.10516681523065f, 502.49943180234266f));
+    pts.push_back(Point2f(82.66611013934804f, 494.0581153893113f));
+    pts.push_back(Point2f(63.573319848965966f, 485.6772487054385f));
+    pts.push_back(Point2f(47.65729058071245f, 475.4468806518075f));
+    pts.push_back(Point2f(19.96819458379347f, 463.98285210241943f));
+    pts.push_back(Point2f(27.855803175234342f, 450.2298664426336f));
+    pts.push_back(Point2f(12.832198085636549f, 435.6317753810441f));
+
+    bool AMSGoodQ;
+    float tol = 0.01f;
+
+    RotatedRect     ellipseAMSTrue = cv::RotatedRect(Point2f(265.252f, 451.597f), Size2f(503.386f, 174.674f), 5.31814f);
+    RotatedRect     ellipseAMSTest = fitEllipseAMS(pts);
+    Point2f         ellipseAMSTrueVertices[4];
+    Point2f         ellipseAMSTestVertices[4];
+    ellipseAMSTest.points(ellipseAMSTestVertices);
+    ellipseAMSTrue.points(ellipseAMSTrueVertices);
+    float AMSDiff = 0.0f;
+    for (size_t i=0; i <=3; i++) {
+        Point2f diff = ellipseAMSTrueVertices[i] - ellipseAMSTestVertices[0];
+        float d = diff.x * diff.x + diff.y * diff.y;
+        for (size_t j=1; i <=3; i++) {
+            diff = ellipseAMSTrueVertices[i] - ellipseAMSTestVertices[j];
+            float dd = diff.x * diff.x + diff.y * diff.y;
+            if(dd<d){d=dd;}
+        }
+        AMSDiff += std::sqrt(d);
+    }
+    AMSGoodQ = AMSDiff < tol;
+
+    EXPECT_TRUE(AMSGoodQ);
+}
+
+TEST(Imgproc_FitEllipseAMS_Issue_6, accuracy) {
+    vector<Point2f>pts;
+    pts.push_back(Point2f(414.90156479295905f, 29.063453659930833f));
+    pts.push_back(Point2f(393.79576036337977f, 58.59512774879134f));
+    pts.push_back(Point2f(387.9100725249931f, 94.65067695657254f));
+    pts.push_back(Point2f(351.6987114318621f, 124.6049267560123f));
+    pts.push_back(Point2f(335.3270519942532f, 154.52182750730412f));
+    pts.push_back(Point2f(329.2955843262556f, 179.38031343427303f));
+    pts.push_back(Point2f(322.7316812937696f, 201.88774427737036f));
+    pts.push_back(Point2f(301.48326350826585f, 217.63331351026562f));
+    pts.push_back(Point2f(287.4603938315088f, 228.68790184154113f));
+    pts.push_back(Point2f(273.36617750656023f, 234.48397257849905f));
+    pts.push_back(Point2f(270.7787206270782f, 242.85279436204632f));
+    pts.push_back(Point2f(268.6973828073692f, 246.10891460870312f));
+    pts.push_back(Point2f(261.60715070464255f, 252.65744793902192f));
+    pts.push_back(Point2f(262.9041824871923f, 257.1813047575656f));
+    pts.push_back(Point2f(263.3210079177046f, 260.0532193246593f));
+    pts.push_back(Point2f(248.49568488533242f, 264.56723557175013f));
+    pts.push_back(Point2f(245.4134174127509f, 264.87259401292f));
+    pts.push_back(Point2f(244.73208618171216f, 272.32307359830884f));
+    pts.push_back(Point2f(232.82093196087555f, 272.0239734764616f));
+    pts.push_back(Point2f(235.28539413113458f, 276.8668447478244f));
+    pts.push_back(Point2f(231.9766571511147f, 277.71179872893083f));
+    pts.push_back(Point2f(227.23880706209866f, 284.5588878789101f));
+    pts.push_back(Point2f(222.53202223537826f, 282.2293154479012f));
+    pts.push_back(Point2f(217.27525654729595f, 297.42961148365725f));
+    pts.push_back(Point2f(212.19490057230672f, 294.5344078014253f));
+    pts.push_back(Point2f(207.47417472945446f, 301.72230412668307f));
+    pts.push_back(Point2f(202.11143229969164f, 298.8588627545512f));
+    pts.push_back(Point2f(196.62967096845824f, 309.39738607353223f));
+    pts.push_back(Point2f(190.37809841992106f, 318.3250479151242f));
+    pts.push_back(Point2f(183.1296129732803f, 322.35242231955453f));
+    pts.push_back(Point2f(171.58530535265993f, 330.4981441404153f));
+    pts.push_back(Point2f(160.40092880652247f, 337.47275990208226f));
+    pts.push_back(Point2f(149.44888762618092f, 343.42296086656717f));
+    pts.push_back(Point2f(139.7923528305302f, 353.4821948045352f));
+    pts.push_back(Point2f(121.08414969113318f, 359.7010225709457f));
+    pts.push_back(Point2f(100.10629739219641f, 375.3155744055458f));
+    pts.push_back(Point2f(78.15715630786733f, 389.0311284319413f));
+    pts.push_back(Point2f(51.22820988075294f, 396.98646504159547f));
+    pts.push_back(Point2f(30.71132492338431f, 402.85098740402844f));
+    pts.push_back(Point2f(10.994737323179852f, 394.6764602972333f));
+
+    bool AMSGoodQ;
+    float tol = 0.01f;
+
+    RotatedRect     ellipseAMSTrue = cv::RotatedRect(Point2f(192.467f, 204.404f), Size2f(551.397f, 165.068f), 136.913f);
+    RotatedRect     ellipseAMSTest = fitEllipseAMS(pts);
+    Point2f         ellipseAMSTrueVertices[4];
+    Point2f         ellipseAMSTestVertices[4];
+    ellipseAMSTest.points(ellipseAMSTestVertices);
+    ellipseAMSTrue.points(ellipseAMSTrueVertices);
+    float AMSDiff = 0.0f;
+    for (size_t i=0; i <=3; i++) {
+        Point2f diff = ellipseAMSTrueVertices[i] - ellipseAMSTestVertices[0];
+        float d = diff.x * diff.x + diff.y * diff.y;
+        for (size_t j=1; i <=3; i++) {
+            diff = ellipseAMSTrueVertices[i] - ellipseAMSTestVertices[j];
+            float dd = diff.x * diff.x + diff.y * diff.y;
+            if(dd<d){d=dd;}
+        }
+        AMSDiff += std::sqrt(d);
+    }
+    AMSGoodQ = AMSDiff < tol;
+
+    EXPECT_TRUE(AMSGoodQ);
+}
+
+TEST(Imgproc_FitEllipseAMS_Issue_7, accuracy) {
+    vector<Point2f>pts;
+    pts.push_back(Point2f(386.7497806918209f, 119.55623710363142f));
+    pts.push_back(Point2f(399.0712613744503f, 132.61095972401034f));
+    pts.push_back(Point2f(400.3582576852657f, 146.71942033652573f));
+    pts.push_back(Point2f(383.31046706707906f, 160.13631428164982f));
+    pts.push_back(Point2f(387.1626582455823f, 173.82700569763574f));
+    pts.push_back(Point2f(378.88843308401425f, 186.10333319745317f));
+    pts.push_back(Point2f(367.55061701208f, 201.41492900400164f));
+    pts.push_back(Point2f(360.3254967185148f, 209.03834085076022f));
+    pts.push_back(Point2f(346.2645164278429f, 222.03214282040395f));
+    pts.push_back(Point2f(342.3483403634167f, 230.58290419787073f));
+    pts.push_back(Point2f(326.2900969991908f, 240.23679566682756f));
+    pts.push_back(Point2f(324.5622396580625f, 249.56961396707823f));
+    pts.push_back(Point2f(304.23417130914095f, 259.6693711280021f));
+    pts.push_back(Point2f(295.54035697534675f, 270.82284542557704f));
+    pts.push_back(Point2f(291.7403057147348f, 276.1536825048371f));
+    pts.push_back(Point2f(269.19344116558665f, 287.1705579044651f));
+    pts.push_back(Point2f(256.5350613899267f, 274.91264707500943f));
+    pts.push_back(Point2f(245.93644351417183f, 286.12398028743064f));
+    pts.push_back(Point2f(232.40892420943732f, 282.73986583867065f));
+    pts.push_back(Point2f(216.17957969101082f, 293.22229708237705f));
+    pts.push_back(Point2f(205.66843722622573f, 295.7032575625158f));
+    pts.push_back(Point2f(192.219969335765f, 302.6968969534755f));
+    pts.push_back(Point2f(178.37758801730416f, 295.56656776633287f));
+    pts.push_back(Point2f(167.60089103756644f, 301.4629292267722f));
+    pts.push_back(Point2f(157.44802813915317f, 298.90830855734504f));
+    pts.push_back(Point2f(138.44311818820313f, 293.951927187897f));
+    pts.push_back(Point2f(128.92747660038592f, 291.4122695492978f));
+    pts.push_back(Point2f(119.75160909865994f, 282.5809454721714f));
+    pts.push_back(Point2f(98.48443737042328f, 290.39938776333247f));
+    pts.push_back(Point2f(88.05275635126131f, 280.11156058895745f));
+    pts.push_back(Point2f(82.45799026448167f, 271.46668468419773f));
+    pts.push_back(Point2f(68.04031962064084f, 267.8136468580707f));
+    pts.push_back(Point2f(58.99967170878713f, 263.8859310392943f));
+    pts.push_back(Point2f(41.256097220823484f, 260.6041605773932f));
+    pts.push_back(Point2f(40.66198797608645f, 246.64973068177196f));
+    pts.push_back(Point2f(31.085484380646008f, 239.28615601336074f));
+    pts.push_back(Point2f(24.069417111444253f, 225.2228746297288f));
+    pts.push_back(Point2f(22.10122953275156f, 212.75509683149195f));
+    pts.push_back(Point2f(9.929991244497518f, 203.20662088477752f));
+    pts.push_back(Point2f(0.0f, 190.04891498441148f));
+
+    bool AMSGoodQ;
+    float tol = 0.01f;
+
+    RotatedRect     ellipseAMSTrue = cv::RotatedRect(Point2f(197.292f, 134.64f), Size2f(401.092f, 320.051f), 165.429f);
+    RotatedRect     ellipseAMSTest = fitEllipseAMS(pts);
+    Point2f         ellipseAMSTrueVertices[4];
+    Point2f         ellipseAMSTestVertices[4];
+    ellipseAMSTest.points(ellipseAMSTestVertices);
+    ellipseAMSTrue.points(ellipseAMSTrueVertices);
+    float AMSDiff = 0.0f;
+    for (size_t i=0; i <=3; i++) {
+        Point2f diff = ellipseAMSTrueVertices[i] - ellipseAMSTestVertices[0];
+        float d = diff.x * diff.x + diff.y * diff.y;
+        for (size_t j=1; i <=3; i++) {
+            diff = ellipseAMSTrueVertices[i] - ellipseAMSTestVertices[j];
+            float dd = diff.x * diff.x + diff.y * diff.y;
+            if(dd<d){d=dd;}
+        }
+        AMSDiff += std::sqrt(d);
+    }
+    AMSGoodQ = AMSDiff < tol;
+
+    EXPECT_TRUE(AMSGoodQ);
+}
diff --git a/modules/imgproc/test/test_fitellipseDirect.cpp b/modules/imgproc/test/test_fitellipseDirect.cpp
new file mode 100644 (file)
index 0000000..b183673
--- /dev/null
@@ -0,0 +1,442 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2016, Itseez, Inc, all rights reserved.
+
+#include "test_precomp.hpp"
+#include <vector>
+#include <cmath>
+
+using namespace cv;
+using namespace std;
+
+
+TEST(Imgproc_FitEllipseDirect_Issue_1, accuracy) {
+    vector<Point2f>pts;
+    pts.push_back(Point2f(173.41854895999165f, 125.84473135880411f));
+    pts.push_back(Point2f(180.63769498640912f, 130.960006577589f));
+    pts.push_back(Point2f(174.99173759130173f, 137.34265632926764f));
+    pts.push_back(Point2f(170.9044645313217f, 141.68017556480243f));
+    pts.push_back(Point2f(163.48965388499656f, 141.9404438924043f));
+    pts.push_back(Point2f(159.37687818401147f, 148.60835331594876f));
+    pts.push_back(Point2f(150.38917629356735f, 155.68825577720446f));
+    pts.push_back(Point2f(147.16319653316862f, 157.06039984963923f));
+    pts.push_back(Point2f(141.73118707843207f, 157.2570155198414f));
+    pts.push_back(Point2f(130.61569602948597f, 159.40742182929364f));
+    pts.push_back(Point2f(127.00573042229027f, 161.34430232187867f));
+    pts.push_back(Point2f(120.49383815053747f, 163.72610883128334f));
+    pts.push_back(Point2f(114.62383760040998f, 162.6788666385239f));
+    pts.push_back(Point2f(108.84871269183333f, 161.90597054388132f));
+    pts.push_back(Point2f(103.04574087829076f, 167.44352944383985f));
+    pts.push_back(Point2f(96.31623870161255f, 163.71641295746116f));
+    pts.push_back(Point2f(89.86174417295126f, 157.2967811253635f));
+    pts.push_back(Point2f(84.27940674801192f, 168.6331304010667f));
+    pts.push_back(Point2f(76.61995117937661f, 159.4445412678832f));
+    pts.push_back(Point2f(72.22526316142418f, 154.60770776728293f));
+    pts.push_back(Point2f(64.97742405067658f, 152.3687174339018f));
+    pts.push_back(Point2f(58.34612797237003f, 155.61116802371583f));
+    pts.push_back(Point2f(55.59089117268539f, 148.56245696566418f));
+    pts.push_back(Point2f(45.22711195983706f, 145.6713241271927f));
+    pts.push_back(Point2f(40.090542298840234f, 142.36141304004002f));
+    pts.push_back(Point2f(31.788996807277414f, 136.26164877915585f));
+    pts.push_back(Point2f(27.27613006088805f, 137.46860042141503f));
+    pts.push_back(Point2f(23.972392188502226f, 129.17993872328594f));
+    pts.push_back(Point2f(20.688046711616977f, 121.52750840733087f));
+    pts.push_back(Point2f(14.635115184257643f, 115.36942800110485f));
+    pts.push_back(Point2f(14.850919318756809f, 109.43609786936987f));
+    pts.push_back(Point2f(7.476847697758103f, 102.67657265589285f));
+    pts.push_back(Point2f(1.8896944088091914f, 95.78878215565676f));
+    pts.push_back(Point2f(1.731997022935417f, 88.17674033990495f));
+    pts.push_back(Point2f(1.6780841363402033f, 80.65581939883002f));
+    pts.push_back(Point2f(0.035330281415411946f, 73.1088693846768f));
+    pts.push_back(Point2f(0.14652518786238033f, 65.42769523404296f));
+    pts.push_back(Point2f(6.99914645302843f, 58.436451064804245f));
+    pts.push_back(Point2f(6.719616410428614f, 50.15263031354927f));
+    pts.push_back(Point2f(5.122267598477748f, 46.03603214691343f));
+
+    bool directGoodQ;
+    float tol = 0.01f;
+
+    RotatedRect     ellipseDirectTrue = cv::RotatedRect(Point2f(91.3256f, 90.4668f),Size2f(187.211f, 140.031f), 21.5808f);
+    RotatedRect     ellipseDirectTest = fitEllipseDirect(pts);
+    Point2f         ellipseDirectTrueVertices[4];
+    Point2f         ellipseDirectTestVertices[4];
+    ellipseDirectTest.points(ellipseDirectTestVertices);
+    ellipseDirectTrue.points(ellipseDirectTrueVertices);
+    float directDiff = 0.0f;
+    for (size_t i=0; i <=3; i++) {
+        Point2f diff = ellipseDirectTrueVertices[i] - ellipseDirectTestVertices[0];
+        float d = diff.x * diff.x + diff.y * diff.y;
+        for (size_t j=1; i <=3; i++) {
+            diff = ellipseDirectTrueVertices[i] - ellipseDirectTestVertices[j];
+            float dd = diff.x * diff.x + diff.y * diff.y;
+            if(dd<d){d=dd;}
+        }
+        directDiff += std::sqrt(d);
+    }
+    directGoodQ = directDiff < tol;
+
+    EXPECT_TRUE(directGoodQ);
+}
+
+TEST(Imgproc_FitEllipseDirect_Issue_2, accuracy) {
+    vector<Point2f>pts;
+    pts.push_back(Point2f(436.59985753246326f, 99.52113368023126f));
+    pts.push_back(Point2f(454.40214161915856f, 160.47565296546912f));
+    pts.push_back(Point2f(406.01996690372687f, 215.41999534561575f));
+    pts.push_back(Point2f(362.8738685722881f, 262.1842668997318f));
+    pts.push_back(Point2f(300.72864073265407f, 290.8182699272777f));
+    pts.push_back(Point2f(247.62963883830972f, 311.383137106776f));
+    pts.push_back(Point2f(194.15394659099445f, 313.30260991427565f));
+    pts.push_back(Point2f(138.934393338296f, 310.50203123324223f));
+    pts.push_back(Point2f(91.66999301197541f, 300.57303988670515f));
+    pts.push_back(Point2f(28.286233855826133f, 268.0670159317756f));
+
+    bool directGoodQ;
+    float tol = 0.01f;
+
+    RotatedRect     ellipseDirectTrue = cv::RotatedRect(Point2f(228.232f, 174.879f),Size2f(450.68f, 265.556f), 166.181f);
+    RotatedRect     ellipseDirectTest = fitEllipseDirect(pts);
+    Point2f         ellipseDirectTrueVertices[4];
+    Point2f         ellipseDirectTestVertices[4];
+    ellipseDirectTest.points(ellipseDirectTestVertices);
+    ellipseDirectTrue.points(ellipseDirectTrueVertices);
+    float directDiff = 0.0f;
+    for (size_t i=0; i <=3; i++) {
+        Point2f diff = ellipseDirectTrueVertices[i] - ellipseDirectTestVertices[0];
+        float d = diff.x * diff.x + diff.y * diff.y;
+        for (size_t j=1; i <=3; i++) {
+            diff = ellipseDirectTrueVertices[i] - ellipseDirectTestVertices[j];
+            float dd = diff.x * diff.x + diff.y * diff.y;
+            if(dd<d){d=dd;}
+        }
+        directDiff += std::sqrt(d);
+    }
+    directGoodQ = directDiff < tol;
+
+    EXPECT_TRUE(directGoodQ);
+}
+
+
+TEST(Imgproc_FitEllipseDirect_Issue_3, accuracy) {
+    vector<Point2f>pts;
+    pts.push_back(Point2f(459.59217920219083f, 480.1054989283611f));
+    pts.push_back(Point2f(427.2759071813645f, 501.82653857689616f));
+    pts.push_back(Point2f(388.35145730295574f, 520.9488690267101f));
+    pts.push_back(Point2f(349.53248668650656f, 522.9153107979839f));
+    pts.push_back(Point2f(309.56018996762094f, 527.449631776843f));
+    pts.push_back(Point2f(272.07480726768665f, 508.12367135706165f));
+    pts.push_back(Point2f(234.69230939247115f, 519.8943877180591f));
+    pts.push_back(Point2f(201.65185545142472f, 509.47870288702813f));
+    pts.push_back(Point2f(169.37222144138462f, 498.2681549419808f));
+    pts.push_back(Point2f(147.96233740677815f, 467.0923094529034f));
+    pts.push_back(Point2f(109.68331701139209f, 433.39069422941986f));
+    pts.push_back(Point2f(81.95454413977822f, 397.34325168750087f));
+    pts.push_back(Point2f(63.74923800767195f, 371.939105294963f));
+    pts.push_back(Point2f(39.966434417279885f, 329.9581349942296f));
+    pts.push_back(Point2f(21.581668415402532f, 292.6692716276865f));
+    pts.push_back(Point2f(13.687334926511767f, 248.91164234903772f));
+    pts.push_back(Point2f(0.0f, 201.25693715845716f));
+    pts.push_back(Point2f(3.90259455356599f, 155.68155247210575f));
+    pts.push_back(Point2f(39.683930802331844f, 110.26290871953987f));
+    pts.push_back(Point2f(47.85826684019932f, 70.82454140948524f));
+
+    bool directGoodQ;
+    float tol = 0.01f;
+
+    RotatedRect     ellipseDirectTrue = cv::RotatedRect(Point2f(255.326f, 272.626f),Size2f(570.999f, 434.23f), 49.0265f);
+    RotatedRect     ellipseDirectTest = fitEllipseDirect(pts);
+    Point2f         ellipseDirectTrueVertices[4];
+    Point2f         ellipseDirectTestVertices[4];
+    ellipseDirectTest.points(ellipseDirectTestVertices);
+    ellipseDirectTrue.points(ellipseDirectTrueVertices);
+    float directDiff = 0.0f;
+    for (size_t i=0; i <=3; i++) {
+        Point2f diff = ellipseDirectTrueVertices[i] - ellipseDirectTestVertices[0];
+        float d = diff.x * diff.x + diff.y * diff.y;
+        for (size_t j=1; i <=3; i++) {
+            diff = ellipseDirectTrueVertices[i] - ellipseDirectTestVertices[j];
+            float dd = diff.x * diff.x + diff.y * diff.y;
+            if(dd<d){d=dd;}
+        }
+        directDiff += std::sqrt(d);
+    }
+    directGoodQ = directDiff < tol;
+
+    EXPECT_TRUE(directGoodQ);
+}
+
+TEST(Imgproc_FitEllipseDirect_Issue_4, accuracy) {
+    vector<Point2f>pts;
+    pts.push_back(Point2f(461.1761758124861f, 79.55196261616746f));
+    pts.push_back(Point2f(470.5034888757249f, 100.56760245239015f));
+    pts.push_back(Point2f(470.7814479849749f, 127.45783922150272f));
+    pts.push_back(Point2f(465.214384653262f, 157.51792078285405f));
+    pts.push_back(Point2f(465.3739691861813f, 185.89204350118942f));
+    pts.push_back(Point2f(443.36043162278366f, 214.43399982709002f));
+    pts.push_back(Point2f(435.04682693174095f, 239.2657073987589f));
+    pts.push_back(Point2f(444.48553588292697f, 262.0816619678671f));
+    pts.push_back(Point2f(407.1290185495328f, 285.07828783776347f));
+    pts.push_back(Point2f(397.71436554935804f, 304.782713567108f));
+    pts.push_back(Point2f(391.65678619785854f, 323.6809382153118f));
+    pts.push_back(Point2f(366.3904205781036f, 328.09416679736563f));
+    pts.push_back(Point2f(341.7656517790918f, 346.9672607008338f));
+    pts.push_back(Point2f(335.8021864809171f, 358.22416661090296f));
+    pts.push_back(Point2f(313.29224574204227f, 373.3267160317279f));
+    pts.push_back(Point2f(291.121216115417f, 377.3339312050791f));
+    pts.push_back(Point2f(284.20367595990547f, 389.5930108233698f));
+    pts.push_back(Point2f(270.9682061106809f, 388.4352006517971f));
+    pts.push_back(Point2f(253.10188273008825f, 392.35120876055373f));
+    pts.push_back(Point2f(234.2306946938868f, 407.0773705761117f));
+    pts.push_back(Point2f(217.0544384092144f, 407.54850609237235f));
+    pts.push_back(Point2f(198.40910966657933f, 423.7008860314684f));
+    pts.push_back(Point2f(175.47011114845057f, 420.4223434173364f));
+    pts.push_back(Point2f(154.92083551695902f, 418.5288198459268f));
+    pts.push_back(Point2f(136.52988517939698f, 417.8311217226818f));
+    pts.push_back(Point2f(114.74657291069317f, 410.1534699388714f));
+    pts.push_back(Point2f(78.9220388330042f, 397.6266608135022f));
+    pts.push_back(Point2f(76.82658673144391f, 404.27399269891055f));
+    pts.push_back(Point2f(50.953595435605116f, 386.3824077178053f));
+    pts.push_back(Point2f(43.603489077456985f, 368.7894972436907f));
+    pts.push_back(Point2f(19.37402592752713f, 343.3511017547511f));
+    pts.push_back(Point2f(8.714663367287343f, 322.2148323327599f));
+    pts.push_back(Point2f(0., 288.7836318007535f));
+    pts.push_back(Point2f(3.98686689837605f, 263.1748167870333f));
+    pts.push_back(Point2f(9.536389714519785f, 233.02995195684738f));
+    pts.push_back(Point2f(17.83246556512455f, 205.6536519851621f));
+    pts.push_back(Point2f(33.00593702846919f, 180.52628138608327f));
+    pts.push_back(Point2f(41.572400996463394f, 153.95185568689314f));
+    pts.push_back(Point2f(54.55733659450332f, 136.54322891729444f));
+    pts.push_back(Point2f(78.60990563833005f, 112.76538180538182f));
+
+    bool directGoodQ;
+    float tol = 0.01f;
+
+    RotatedRect     ellipseDirectTrue = cv::RotatedRect(Point2f(236.836f, 208.089f),Size2f(515.893f, 357.166f), -35.9996f);
+    RotatedRect     ellipseDirectTest = fitEllipseDirect(pts);
+    Point2f         ellipseDirectTrueVertices[4];
+    Point2f         ellipseDirectTestVertices[4];
+    ellipseDirectTest.points(ellipseDirectTestVertices);
+    ellipseDirectTrue.points(ellipseDirectTrueVertices);
+    float directDiff = 0.0f;
+    for (size_t i=0; i <=3; i++) {
+        Point2f diff = ellipseDirectTrueVertices[i] - ellipseDirectTestVertices[0];
+        float d = diff.x * diff.x + diff.y * diff.y;
+        for (size_t j=1; i <=3; i++) {
+            diff = ellipseDirectTrueVertices[i] - ellipseDirectTestVertices[j];
+            float dd = diff.x * diff.x + diff.y * diff.y;
+            if(dd<d){d=dd;}
+        }
+        directDiff += std::sqrt(d);
+    }
+    directGoodQ = directDiff < tol;
+
+    EXPECT_TRUE(directGoodQ);
+}
+
+
+
+TEST(Imgproc_FitEllipseDirect_Issue_5, accuracy) {
+    vector<Point2f>pts;
+    pts.push_back(Point2f(509.60609444351917f, 484.8233016998119f));
+    pts.push_back(Point2f(508.55357451809846f, 498.61004779125176f));
+    pts.push_back(Point2f(495.59325478416525f, 507.9238702677585f));
+    pts.push_back(Point2f(455.32905012177747f, 517.7518674113691f));
+    pts.push_back(Point2f(461.24821761238667f, 524.2115477440211f));
+    pts.push_back(Point2f(438.8983455906825f, 528.424911702069f));
+    pts.push_back(Point2f(425.9259699875303f, 532.5700430134499f));
+    pts.push_back(Point2f(405.77496728300616f, 535.7295008444993f));
+    pts.push_back(Point2f(384.31968113982475f, 536.3076260371831f));
+    pts.push_back(Point2f(381.5356536818977f, 540.183355729414f));
+    pts.push_back(Point2f(378.2530503455792f, 540.2871855284832f));
+    pts.push_back(Point2f(357.7242088314752f, 543.473075733281f));
+    pts.push_back(Point2f(339.27871831324853f, 541.2099003613087f));
+    pts.push_back(Point2f(339.22481874867435f, 541.1105421426018f));
+    pts.push_back(Point2f(331.50337377509396f, 539.7296050163102f));
+    pts.push_back(Point2f(317.8306501537862f, 540.9077275195326f));
+    pts.push_back(Point2f(304.9192648323086f, 541.3434792768918f));
+    pts.push_back(Point2f(297.33855427908617f, 543.0590309600501f));
+    pts.push_back(Point2f(288.95330515997694f, 543.8756702506837f));
+    pts.push_back(Point2f(278.5850913122515f, 538.1343888329859f));
+    pts.push_back(Point2f(266.05355938101724f, 538.4115695907074f));
+    pts.push_back(Point2f(255.30186994366096f, 534.2459272411796f));
+    pts.push_back(Point2f(238.52054973466758f, 537.5007401480628f));
+    pts.push_back(Point2f(228.444463024996f, 533.8992361116678f));
+    pts.push_back(Point2f(217.8111623149833f, 538.2269193558991f));
+    pts.push_back(Point2f(209.43502138981037f, 532.8057062984569f));
+    pts.push_back(Point2f(193.33570716763276f, 527.2038128630041f));
+    pts.push_back(Point2f(172.66725340039625f, 526.4020881005537f));
+    pts.push_back(Point2f(158.33654199771337f, 525.2093856704676f));
+    pts.push_back(Point2f(148.65905485249067f, 521.0146762179431f));
+    pts.push_back(Point2f(147.6615365176719f, 517.4315201992808f));
+    pts.push_back(Point2f(122.43568509949394f, 514.2089723387337f));
+    pts.push_back(Point2f(110.88482982039073f, 509.14004840857046f));
+    pts.push_back(Point2f(107.10516681523065f, 502.49943180234266f));
+    pts.push_back(Point2f(82.66611013934804f, 494.0581153893113f));
+    pts.push_back(Point2f(63.573319848965966f, 485.6772487054385f));
+    pts.push_back(Point2f(47.65729058071245f, 475.4468806518075f));
+    pts.push_back(Point2f(19.96819458379347f, 463.98285210241943f));
+    pts.push_back(Point2f(27.855803175234342f, 450.2298664426336f));
+    pts.push_back(Point2f(12.832198085636549f, 435.6317753810441f));
+
+    bool directGoodQ;
+    float tol = 0.01f;
+
+    RotatedRect     ellipseDirectTrue = cv::RotatedRect(Point2f(264.354f, 457.336f),Size2f(493.728f, 162.9f), 5.36186f);
+    RotatedRect     ellipseDirectTest = fitEllipseDirect(pts);
+    Point2f         ellipseDirectTrueVertices[4];
+    Point2f         ellipseDirectTestVertices[4];
+    ellipseDirectTest.points(ellipseDirectTestVertices);
+    ellipseDirectTrue.points(ellipseDirectTrueVertices);
+    float directDiff = 0.0f;
+    for (size_t i=0; i <=3; i++) {
+        Point2f diff = ellipseDirectTrueVertices[i] - ellipseDirectTestVertices[0];
+        float d = diff.x * diff.x + diff.y * diff.y;
+        for (size_t j=1; i <=3; i++) {
+            diff = ellipseDirectTrueVertices[i] - ellipseDirectTestVertices[j];
+            float dd = diff.x * diff.x + diff.y * diff.y;
+            if(dd<d){d=dd;}
+        }
+        directDiff += std::sqrt(d);
+    }
+    directGoodQ = directDiff < tol;
+
+    EXPECT_TRUE(directGoodQ);
+}
+
+TEST(Imgproc_FitEllipseDirect_Issue_6, accuracy) {
+    vector<Point2f>pts;
+    pts.push_back(Point2f(414.90156479295905f, 29.063453659930833f));
+    pts.push_back(Point2f(393.79576036337977f, 58.59512774879134f));
+    pts.push_back(Point2f(387.9100725249931f, 94.65067695657254f));
+    pts.push_back(Point2f(351.6987114318621f, 124.6049267560123f));
+    pts.push_back(Point2f(335.3270519942532f, 154.52182750730412f));
+    pts.push_back(Point2f(329.2955843262556f, 179.38031343427303f));
+    pts.push_back(Point2f(322.7316812937696f, 201.88774427737036f));
+    pts.push_back(Point2f(301.48326350826585f, 217.63331351026562f));
+    pts.push_back(Point2f(287.4603938315088f, 228.68790184154113f));
+    pts.push_back(Point2f(273.36617750656023f, 234.48397257849905f));
+    pts.push_back(Point2f(270.7787206270782f, 242.85279436204632f));
+    pts.push_back(Point2f(268.6973828073692f, 246.10891460870312f));
+    pts.push_back(Point2f(261.60715070464255f, 252.65744793902192f));
+    pts.push_back(Point2f(262.9041824871923f, 257.1813047575656f));
+    pts.push_back(Point2f(263.3210079177046f, 260.0532193246593f));
+    pts.push_back(Point2f(248.49568488533242f, 264.56723557175013f));
+    pts.push_back(Point2f(245.4134174127509f, 264.87259401292f));
+    pts.push_back(Point2f(244.73208618171216f, 272.32307359830884f));
+    pts.push_back(Point2f(232.82093196087555f, 272.0239734764616f));
+    pts.push_back(Point2f(235.28539413113458f, 276.8668447478244f));
+    pts.push_back(Point2f(231.9766571511147f, 277.71179872893083f));
+    pts.push_back(Point2f(227.23880706209866f, 284.5588878789101f));
+    pts.push_back(Point2f(222.53202223537826f, 282.2293154479012f));
+    pts.push_back(Point2f(217.27525654729595f, 297.42961148365725f));
+    pts.push_back(Point2f(212.19490057230672f, 294.5344078014253f));
+    pts.push_back(Point2f(207.47417472945446f, 301.72230412668307f));
+    pts.push_back(Point2f(202.11143229969164f, 298.8588627545512f));
+    pts.push_back(Point2f(196.62967096845824f, 309.39738607353223f));
+    pts.push_back(Point2f(190.37809841992106f, 318.3250479151242f));
+    pts.push_back(Point2f(183.1296129732803f, 322.35242231955453f));
+    pts.push_back(Point2f(171.58530535265993f, 330.4981441404153f));
+    pts.push_back(Point2f(160.40092880652247f, 337.47275990208226f));
+    pts.push_back(Point2f(149.44888762618092f, 343.42296086656717f));
+    pts.push_back(Point2f(139.7923528305302f, 353.4821948045352f));
+    pts.push_back(Point2f(121.08414969113318f, 359.7010225709457f));
+    pts.push_back(Point2f(100.10629739219641f, 375.3155744055458f));
+    pts.push_back(Point2f(78.15715630786733f, 389.0311284319413f));
+    pts.push_back(Point2f(51.22820988075294f, 396.98646504159547f));
+    pts.push_back(Point2f(30.71132492338431f, 402.85098740402844f));
+    pts.push_back(Point2f(10.994737323179852f, 394.6764602972333f));
+
+    bool directGoodQ;
+    float tol = 0.01f;
+
+    RotatedRect     ellipseDirectTrue = cv::RotatedRect(Point2f(207.145f, 223.308f),Size2f(499.583f, 117.473f), -42.6851f);
+    RotatedRect     ellipseDirectTest = fitEllipseDirect(pts);
+    Point2f         ellipseDirectTrueVertices[4];
+    Point2f         ellipseDirectTestVertices[4];
+    ellipseDirectTest.points(ellipseDirectTestVertices);
+    ellipseDirectTrue.points(ellipseDirectTrueVertices);
+    float directDiff = 0.0f;
+    for (size_t i=0; i <=3; i++) {
+        Point2f diff = ellipseDirectTrueVertices[i] - ellipseDirectTestVertices[0];
+        float d = diff.x * diff.x + diff.y * diff.y;
+        for (size_t j=1; i <=3; i++) {
+            diff = ellipseDirectTrueVertices[i] - ellipseDirectTestVertices[j];
+            float dd = diff.x * diff.x + diff.y * diff.y;
+            if(dd<d){d=dd;}
+        }
+        directDiff += std::sqrt(d);
+    }
+    directGoodQ = directDiff < tol;
+
+    EXPECT_TRUE(directGoodQ);
+}
+
+TEST(Imgproc_FitEllipseDirect_Issue_7, accuracy) {
+    vector<Point2f>pts;
+    pts.push_back(Point2f(386.7497806918209f, 119.55623710363142f));
+    pts.push_back(Point2f(399.0712613744503f, 132.61095972401034f));
+    pts.push_back(Point2f(400.3582576852657f, 146.71942033652573f));
+    pts.push_back(Point2f(383.31046706707906f, 160.13631428164982f));
+    pts.push_back(Point2f(387.1626582455823f, 173.82700569763574f));
+    pts.push_back(Point2f(378.88843308401425f, 186.10333319745317f));
+    pts.push_back(Point2f(367.55061701208f, 201.41492900400164f));
+    pts.push_back(Point2f(360.3254967185148f, 209.03834085076022f));
+    pts.push_back(Point2f(346.2645164278429f, 222.03214282040395f));
+    pts.push_back(Point2f(342.3483403634167f, 230.58290419787073f));
+    pts.push_back(Point2f(326.2900969991908f, 240.23679566682756f));
+    pts.push_back(Point2f(324.5622396580625f, 249.56961396707823f));
+    pts.push_back(Point2f(304.23417130914095f, 259.6693711280021f));
+    pts.push_back(Point2f(295.54035697534675f, 270.82284542557704f));
+    pts.push_back(Point2f(291.7403057147348f, 276.1536825048371f));
+    pts.push_back(Point2f(269.19344116558665f, 287.1705579044651f));
+    pts.push_back(Point2f(256.5350613899267f, 274.91264707500943f));
+    pts.push_back(Point2f(245.93644351417183f, 286.12398028743064f));
+    pts.push_back(Point2f(232.40892420943732f, 282.73986583867065f));
+    pts.push_back(Point2f(216.17957969101082f, 293.22229708237705f));
+    pts.push_back(Point2f(205.66843722622573f, 295.7032575625158f));
+    pts.push_back(Point2f(192.219969335765f, 302.6968969534755f));
+    pts.push_back(Point2f(178.37758801730416f, 295.56656776633287f));
+    pts.push_back(Point2f(167.60089103756644f, 301.4629292267722f));
+    pts.push_back(Point2f(157.44802813915317f, 298.90830855734504f));
+    pts.push_back(Point2f(138.44311818820313f, 293.951927187897f));
+    pts.push_back(Point2f(128.92747660038592f, 291.4122695492978f));
+    pts.push_back(Point2f(119.75160909865994f, 282.5809454721714f));
+    pts.push_back(Point2f(98.48443737042328f, 290.39938776333247f));
+    pts.push_back(Point2f(88.05275635126131f, 280.11156058895745f));
+    pts.push_back(Point2f(82.45799026448167f, 271.46668468419773f));
+    pts.push_back(Point2f(68.04031962064084f, 267.8136468580707f));
+    pts.push_back(Point2f(58.99967170878713f, 263.8859310392943f));
+    pts.push_back(Point2f(41.256097220823484f, 260.6041605773932f));
+    pts.push_back(Point2f(40.66198797608645f, 246.64973068177196f));
+    pts.push_back(Point2f(31.085484380646008f, 239.28615601336074f));
+    pts.push_back(Point2f(24.069417111444253f, 225.2228746297288f));
+    pts.push_back(Point2f(22.10122953275156f, 212.75509683149195f));
+    pts.push_back(Point2f(9.929991244497518f, 203.20662088477752f));
+    pts.push_back(Point2f(0.0f, 190.04891498441148f));
+
+    bool directGoodQ;
+    float tol = 0.01f;
+
+    RotatedRect     ellipseDirectTrue = cv::RotatedRect(Point2f(199.463f, 150.997f),Size2f(390.341f, 286.01f), -12.9696f);
+    RotatedRect     ellipseDirectTest = fitEllipseDirect(pts);
+    Point2f         ellipseDirectTrueVertices[4];
+    Point2f         ellipseDirectTestVertices[4];
+    ellipseDirectTest.points(ellipseDirectTestVertices);
+    ellipseDirectTrue.points(ellipseDirectTrueVertices);
+    float directDiff = 0.0f;
+    for (size_t i=0; i <=3; i++) {
+        Point2f diff = ellipseDirectTrueVertices[i] - ellipseDirectTestVertices[0];
+        float d = diff.x * diff.x + diff.y * diff.y;
+        for (size_t j=1; i <=3; i++) {
+            diff = ellipseDirectTrueVertices[i] - ellipseDirectTestVertices[j];
+            float dd = diff.x * diff.x + diff.y * diff.y;
+            if(dd<d){d=dd;}
+        }
+        directDiff += std::sqrt(d);
+    }
+    directGoodQ = directDiff < tol;
+
+    EXPECT_TRUE(directGoodQ);
+}
diff --git a/modules/js/CMakeLists.txt b/modules/js/CMakeLists.txt
new file mode 100644 (file)
index 0000000..4d90f46
--- /dev/null
@@ -0,0 +1,125 @@
+# ----------------------------------------------------------------------------
+#  CMake file for js support
+# ----------------------------------------------------------------------------
+
+# message(STATUS "---------------- Start of JavaScript module ----------------------")
+
+set(the_description "The js bindings")
+set(MODULE_NAME js)
+
+set(OPENCV_JS "opencv.js")
+
+ocv_add_module(${MODULE_NAME} BINDINGS)
+
+# TODO: add emscripten path
+ocv_module_include_directories()
+
+# get list of modules to wrap
+# message(STATUS "Wrapped in ${MODULE_NAME}:")
+set(OPENCV_JS_MODULES)
+foreach(m ${OPENCV_MODULES_BUILD})
+  if (";${OPENCV_MODULE_${m}_WRAPPERS};" MATCHES ";${MODULE_NAME};" AND HAVE_${m})
+    list(APPEND OPENCV_JS_MODULES ${m})
+    # message(STATUS "\t${m}")
+  endif()
+endforeach()
+
+set(opencv_hdrs "")
+foreach(m ${OPENCV_JS_MODULES})
+  list(APPEND opencv_hdrs ${OPENCV_MODULE_${m}_HEADERS})
+endforeach(m)
+
+# header blacklist
+ocv_list_filterout(opencv_hdrs "modules/.*.h$")
+ocv_list_filterout(opencv_hdrs "modules/core/.*/cuda")
+ocv_list_filterout(opencv_hdrs "modules/core/include/opencv2/core/opengl.hpp")
+ocv_list_filterout(opencv_hdrs "modules/core/include/opencv2/core/ocl.hpp")
+ocv_list_filterout(opencv_hdrs "modules/cuda.*")
+ocv_list_filterout(opencv_hdrs "modules/cudev")
+ocv_list_filterout(opencv_hdrs "modules/core/.*/hal/")
+ocv_list_filterout(opencv_hdrs "modules/.*/detection_based_tracker.hpp") # Conditional compilation
+ocv_list_filterout(opencv_hdrs "modules/core/include/opencv2/core/utils/.*")
+
+file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/headers.txt" "${opencv_hdrs}")
+
+set(bindings_cpp "${CMAKE_CURRENT_BINARY_DIR}/bindings.cpp")
+
+set(scripts_hdr_parser "${CMAKE_CURRENT_SOURCE_DIR}/../python/src2/hdr_parser.py")
+
+set(JS_HELPER "${CMAKE_CURRENT_SOURCE_DIR}/src/helpers.js")
+
+add_custom_command(
+   OUTPUT ${bindings_cpp}
+   COMMAND ${PYTHON_DEFAULT_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/src/embindgen.py" ${scripts_hdr_parser} ${bindings_cpp} "${CMAKE_CURRENT_BINARY_DIR}/headers.txt" "${CMAKE_CURRENT_SOURCE_DIR}/src/core_bindings.cpp"
+   DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/core_bindings.cpp
+   DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/embindgen.py
+   DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/templates.py
+   DEPENDS ${scripts_hdr_parser}
+   DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/headers.txt
+   DEPENDS ${opencv_hdrs}
+   DEPENDS ${JS_HELPER})
+
+add_definitions("-std=c++11")
+
+link_libraries(${OPENCV_MODULE_${the_module}_DEPS})
+
+ocv_add_executable(${the_module} ${bindings_cpp})
+
+set_target_properties(${the_module} PROPERTIES COMPILE_FLAGS "-Wno-missing-prototypes")
+
+set_target_properties(${the_module} PROPERTIES LINK_FLAGS "--memory-init-file 0 -s TOTAL_MEMORY=134217728 -s ALLOW_MEMORY_GROWTH=1 -s MODULARIZE=1 -s EXPORT_NAME=\"'cv'\" -s DEMANGLE_SUPPORT=1 -s FORCE_FILESYSTEM=1 --use-preload-plugins --bind --post-js ${JS_HELPER} -Wno-missing-prototypes")
+
+# add UMD wrapper
+set(MODULE_JS_PATH "${OpenCV_BINARY_DIR}/bin/${the_module}.js")
+set(OCV_JS_PATH "${OpenCV_BINARY_DIR}/bin/${OPENCV_JS}")
+
+add_custom_command(
+   OUTPUT ${OCV_JS_PATH}
+   COMMAND ${PYTHON_DEFAULT_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/src/make_umd.py" ${MODULE_JS_PATH} "${OCV_JS_PATH}"
+   DEPENDS ${the_module}
+   DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/make_umd.py")
+
+add_custom_target(${OPENCV_JS} ALL
+                  DEPENDS ${OCV_JS_PATH}
+                  DEPENDS ${the_module})
+
+# test
+set(opencv_test_js_bin_dir "${EXECUTABLE_OUTPUT_PATH}")
+set(test_dir ${CMAKE_CURRENT_SOURCE_DIR}/test)
+
+set(opencv_test_js_file_deps "")
+
+# message(STATUS "${opencv_test_js_bin_dir}")
+
+# make sure the build directory exists
+file(MAKE_DIRECTORY "${opencv_test_js_bin_dir}")
+
+# gather and copy specific files for js test
+file(GLOB_RECURSE test_files RELATIVE "${test_dir}" "${test_dir}/*")
+foreach(f ${test_files})
+  # message(STATUS "copy ${test_dir}/${f} ${opencv_test_js_bin_dir}/${f}")
+  add_custom_command(OUTPUT "${opencv_test_js_bin_dir}/${f}"
+                     COMMAND ${CMAKE_COMMAND} -E copy_if_different "${test_dir}/${f}" "${opencv_test_js_bin_dir}/${f}"
+                     DEPENDS "${test_dir}/${f}"
+                     COMMENT "Copying ${f}"
+                    )
+  list(APPEND opencv_test_js_file_deps "${test_dir}/${f}" "${opencv_test_js_bin_dir}/${f}")
+endforeach()
+
+# copy test data
+set(test_data "haarcascade_frontalface_default.xml")
+set(test_data_path "${PROJECT_SOURCE_DIR}/../../data/haarcascades/${test_data}")
+
+add_custom_command(OUTPUT "${opencv_test_js_bin_dir}/${test_data}"
+                   COMMAND ${CMAKE_COMMAND} -E copy_if_different "${test_data_path}" "${opencv_test_js_bin_dir}/${test_data}"
+                   DEPENDS "${test_data_path}"
+                   COMMENT "Copying ${test_data}"
+                  )
+list(APPEND opencv_test_js_file_deps "${test_data_path}" "${opencv_test_js_bin_dir}/${test_data}")
+
+add_custom_target(${PROJECT_NAME}_test ALL
+                  DEPENDS ${OCV_JS_PATH} ${opencv_test_js_file_deps})
+
+unset(MODULE_NAME)
+
+# message(STATUS "---------------- End of JavaScript module ----------------------")
diff --git a/modules/js/src/.eslintrc.json b/modules/js/src/.eslintrc.json
new file mode 100644 (file)
index 0000000..4f97ebc
--- /dev/null
@@ -0,0 +1,16 @@
+{
+  "extends": "google",
+  "parserOptions": {
+    "ecmaVersion": 6
+  },
+  "rules": {
+    "max-len": ["error", 100, {"ignoreUrls": true}],
+    "quotes": ["error", "single"],
+    "indent": ["error", 4, {"ArrayExpression": "first",
+                            "CallExpression": {"arguments": "first"},
+                            "SwitchCase": 1}],
+    "no-var": "off",
+    "prefer-rest-params": "off",
+    "require-jsdoc": "off"
+  }
+}
diff --git a/modules/js/src/core_bindings.cpp b/modules/js/src/core_bindings.cpp
new file mode 100644 (file)
index 0000000..5cd5eb2
--- /dev/null
@@ -0,0 +1,587 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+/*M///////////////////////////////////////////////////////////////////////////////////////
+// Author: Sajjad Taheri, University of California, Irvine. sajjadt[at]uci[dot]edu
+//
+//                             LICENSE AGREEMENT
+// Copyright (c) 2015 The Regents of the University of California (Regents)
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+// 3. Neither the name of the University nor the
+//    names of its contributors may be used to endorse or promote products
+//    derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//M*/
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/video/tracking.hpp"
+#include "opencv2/video/background_segm.hpp"
+#include "opencv2/objdetect.hpp"
+
+#include <emscripten/bind.h>
+
+using namespace emscripten;
+using namespace cv;
+
+namespace binding_utils
+{
+    template<typename T>
+    emscripten::val matData(const cv::Mat& mat)
+    {
+        return emscripten::val(emscripten::memory_view<T>((mat.total()*mat.elemSize())/sizeof(T),
+                               (T*)mat.data));
+    }
+
+    template<typename T>
+    emscripten::val matPtr(const cv::Mat& mat, int i)
+    {
+        return emscripten::val(emscripten::memory_view<T>(mat.step1(0), mat.ptr<T>(i)));
+    }
+
+    template<typename T>
+    emscripten::val matPtr(const cv::Mat& mat, int i, int j)
+    {
+        return emscripten::val(emscripten::memory_view<T>(mat.step1(1), mat.ptr<T>(i,j)));
+    }
+
+    cv::Mat* createMat(int rows, int cols, int type, intptr_t data, size_t step)
+    {
+        return new cv::Mat(rows, cols, type, reinterpret_cast<void*>(data), step);
+    }
+
+    static emscripten::val getMatSize(const cv::Mat& mat)
+    {
+        emscripten::val size = emscripten::val::array();
+        for (int i = 0; i < mat.dims; i++) {
+            size.call<void>("push", mat.size[i]);
+        }
+        return size;
+    }
+
+    static emscripten::val getMatStep(const cv::Mat& mat)
+    {
+        emscripten::val step = emscripten::val::array();
+        for (int i = 0; i < mat.dims; i++) {
+            step.call<void>("push", mat.step[i]);
+        }
+        return step;
+    }
+
+    static Mat matEye(int rows, int cols, int type)
+    {
+        return Mat(cv::Mat::eye(rows, cols, type));
+    }
+
+    static Mat matEye(Size size, int type)
+    {
+        return Mat(cv::Mat::eye(size, type));
+    }
+
+    void convertTo(const Mat& obj, Mat& m, int rtype, double alpha, double beta)
+    {
+        obj.convertTo(m, rtype, alpha, beta);
+    }
+
+    void convertTo(const Mat& obj, Mat& m, int rtype)
+    {
+        obj.convertTo(m, rtype);
+    }
+
+    void convertTo(const Mat& obj, Mat& m, int rtype, double alpha)
+    {
+        obj.convertTo(m, rtype, alpha);
+    }
+
+    Size matSize(const cv::Mat& mat)
+    {
+        return mat.size();
+    }
+
+    cv::Mat matZeros(int arg0, int arg1, int arg2)
+    {
+        return cv::Mat::zeros(arg0, arg1, arg2);
+    }
+
+    cv::Mat matZeros(cv::Size arg0, int arg1)
+    {
+        return cv::Mat::zeros(arg0,arg1);
+    }
+
+    cv::Mat matOnes(int arg0, int arg1, int arg2)
+    {
+        return cv::Mat::ones(arg0, arg1, arg2);
+    }
+
+    cv::Mat matOnes(cv::Size arg0, int arg1)
+    {
+        return cv::Mat::ones(arg0, arg1);
+    }
+
+    double matDot(const cv::Mat& obj, const Mat& mat)
+    {
+        return  obj.dot(mat);
+    }
+
+    Mat matMul(const cv::Mat& obj, const Mat& mat, double scale)
+    {
+        return  Mat(obj.mul(mat, scale));
+    }
+
+    Mat matT(const cv::Mat& obj)
+    {
+        return  Mat(obj.t());
+    }
+
+    Mat matInv(const cv::Mat& obj, int type)
+    {
+        return  Mat(obj.inv(type));
+    }
+
+    void matCopyTo(const cv::Mat& obj, cv::Mat& mat)
+    {
+        return obj.copyTo(mat);
+    }
+
+    void matCopyTo(const cv::Mat& obj, cv::Mat& mat, const cv::Mat& mask)
+    {
+        return obj.copyTo(mat, mask);
+    }
+
+    Mat matDiag(const cv::Mat& obj, int d)
+    {
+        return obj.diag(d);
+    }
+
+    Mat matDiag(const cv::Mat& obj)
+    {
+        return obj.diag();
+    }
+
+    void matSetTo(cv::Mat& obj, const cv::Scalar& s)
+    {
+        obj.setTo(s);
+    }
+
+    void matSetTo(cv::Mat& obj, const cv::Scalar& s, const cv::Mat& mask)
+    {
+        obj.setTo(s, mask);
+    }
+
+    emscripten::val rotatedRectPoints(const cv::RotatedRect& obj)
+    {
+        cv::Point2f points[4];
+        obj.points(points);
+        emscripten::val pointsArray = emscripten::val::array();
+        for (int i = 0; i < 4; i++) {
+            pointsArray.call<void>("push", points[i]);
+        }
+        return pointsArray;
+    }
+
+    Rect rotatedRectBoundingRect(const cv::RotatedRect& obj)
+    {
+        return obj.boundingRect();
+    }
+
+    Rect2f rotatedRectBoundingRect2f(const cv::RotatedRect& obj)
+    {
+        return obj.boundingRect2f();
+    }
+
+    int cvMatDepth(int flags)
+    {
+        return CV_MAT_DEPTH(flags);
+    }
+
+    class MinMaxLoc
+    {
+    public:
+        double minVal;
+        double maxVal;
+        Point minLoc;
+        Point maxLoc;
+    };
+
+    MinMaxLoc minMaxLoc(const cv::Mat& src, const cv::Mat& mask)
+    {
+        MinMaxLoc result;
+        cv::minMaxLoc(src, &result.minVal, &result.maxVal, &result.minLoc, &result.maxLoc, mask);
+        return result;
+    }
+
+    MinMaxLoc minMaxLoc_1(const cv::Mat& src)
+    {
+        MinMaxLoc result;
+        cv::minMaxLoc(src, &result.minVal, &result.maxVal, &result.minLoc, &result.maxLoc);
+        return result;
+    }
+
+    class Circle
+    {
+    public:
+        Point2f center;
+        float radius;
+    };
+
+    Circle minEnclosingCircle(const cv::Mat& points)
+    {
+        Circle circle;
+        cv::minEnclosingCircle(points, circle.center, circle.radius);
+        return circle;
+    }
+
+    emscripten::val CamShiftWrapper(const cv::Mat& arg1, Rect& arg2, TermCriteria arg3)
+    {
+        RotatedRect rotatedRect = cv::CamShift(arg1, arg2, arg3);
+        emscripten::val result = emscripten::val::array();
+        result.call<void>("push", rotatedRect);
+        result.call<void>("push", arg2);
+        return result;
+    }
+
+    emscripten::val meanShiftWrapper(const cv::Mat& arg1, Rect& arg2, TermCriteria arg3)
+    {
+        int n = cv::meanShift(arg1, arg2, arg3);
+        emscripten::val result = emscripten::val::array();
+        result.call<void>("push", n);
+        result.call<void>("push", arg2);
+        return result;
+    }
+
+    std::string getExceptionMsg(const cv::Exception& e) {
+        return e.msg;
+    }
+
+    void setExceptionMsg(cv::Exception& e, std::string msg) {
+        e.msg = msg;
+        return;
+    }
+
+    cv::Exception exceptionFromPtr(intptr_t ptr) {
+        return *reinterpret_cast<cv::Exception*>(ptr);
+    }
+
+    std::string getBuildInformation() {
+        return cv::getBuildInformation();
+    }
+}
+
+EMSCRIPTEN_BINDINGS(binding_utils)
+{
+    register_vector<int>("IntVector");
+    register_vector<float>("FloatVector");
+    register_vector<double>("DoubleVector");
+    register_vector<cv::Point>("PointVector");
+    register_vector<cv::Mat>("MatVector");
+    register_vector<cv::Rect>("RectVector");
+
+    emscripten::class_<cv::Mat>("Mat")
+        .constructor<>()
+        .constructor<const Mat&>()
+        .constructor<Size, int>()
+        .constructor<int, int, int>()
+        .constructor<int, int, int, const Scalar&>()
+        .constructor(&binding_utils::createMat, allow_raw_pointers())
+
+        .class_function("eye", select_overload<Mat(int, int, int)>(&binding_utils::matEye))
+        .class_function("eye", select_overload<Mat(Size, int)>(&binding_utils::matEye))
+        .class_function("ones", select_overload<Mat(int, int, int)>(&binding_utils::matOnes))
+        .class_function("ones", select_overload<Mat(Size, int)>(&binding_utils::matOnes))
+        .class_function("zeros", select_overload<Mat(int, int, int)>(&binding_utils::matZeros))
+        .class_function("zeros", select_overload<Mat(Size, int)>(&binding_utils::matZeros))
+
+        .property("rows", &cv::Mat::rows)
+        .property("cols", &cv::Mat::cols)
+        .property("matSize", &binding_utils::getMatSize)
+        .property("step", &binding_utils::getMatStep)
+        .property("data", &binding_utils::matData<unsigned char>)
+        .property("data8S", &binding_utils::matData<char>)
+        .property("data16U", &binding_utils::matData<unsigned short>)
+        .property("data16S", &binding_utils::matData<short>)
+        .property("data32S", &binding_utils::matData<int>)
+        .property("data32F", &binding_utils::matData<float>)
+        .property("data64F", &binding_utils::matData<double>)
+
+        .function("elemSize", select_overload<size_t()const>(&cv::Mat::elemSize))
+        .function("elemSize1", select_overload<size_t()const>(&cv::Mat::elemSize1))
+        .function("channels", select_overload<int()const>(&cv::Mat::channels))
+        .function("convertTo", select_overload<void(const Mat&, Mat&, int, double, double)>(&binding_utils::convertTo))
+        .function("convertTo", select_overload<void(const Mat&, Mat&, int)>(&binding_utils::convertTo))
+        .function("convertTo", select_overload<void(const Mat&, Mat&, int, double)>(&binding_utils::convertTo))
+        .function("total", select_overload<size_t()const>(&cv::Mat::total))
+        .function("row", select_overload<Mat(int)const>(&cv::Mat::row))
+        .function("create", select_overload<void(int, int, int)>(&cv::Mat::create))
+        .function("create", select_overload<void(Size, int)>(&cv::Mat::create))
+        .function("rowRange", select_overload<Mat(int, int)const>(&cv::Mat::rowRange))
+        .function("rowRange", select_overload<Mat(const Range&)const>(&cv::Mat::rowRange))
+        .function("copyTo", select_overload<void(const Mat&, Mat&)>(&binding_utils::matCopyTo))
+        .function("copyTo", select_overload<void(const Mat&, Mat&, const Mat&)>(&binding_utils::matCopyTo))
+        .function("type", select_overload<int()const>(&cv::Mat::type))
+        .function("empty", select_overload<bool()const>(&cv::Mat::empty))
+        .function("colRange", select_overload<Mat(int, int)const>(&cv::Mat::colRange))
+        .function("colRange", select_overload<Mat(const Range&)const>(&cv::Mat::colRange))
+        .function("step1", select_overload<size_t(int)const>(&cv::Mat::step1))
+        .function("clone", select_overload<Mat()const>(&cv::Mat::clone))
+        .function("depth", select_overload<int()const>(&cv::Mat::depth))
+        .function("col", select_overload<Mat(int)const>(&cv::Mat::col))
+        .function("dot", select_overload<double(const Mat&, const Mat&)>(&binding_utils::matDot))
+        .function("mul", select_overload<Mat(const Mat&, const Mat&, double)>(&binding_utils::matMul))
+        .function("inv", select_overload<Mat(const Mat&, int)>(&binding_utils::matInv))
+        .function("t", select_overload<Mat(const Mat&)>(&binding_utils::matT))
+        .function("roi", select_overload<Mat(const Rect&)const>(&cv::Mat::operator()))
+        .function("diag", select_overload<Mat(const Mat&, int)>(&binding_utils::matDiag))
+        .function("diag", select_overload<Mat(const Mat&)>(&binding_utils::matDiag))
+        .function("isContinuous", select_overload<bool()const>(&cv::Mat::isContinuous))
+        .function("setTo", select_overload<void(Mat&, const Scalar&)>(&binding_utils::matSetTo))
+        .function("setTo", select_overload<void(Mat&, const Scalar&, const Mat&)>(&binding_utils::matSetTo))
+        .function("size", select_overload<Size(const Mat&)>(&binding_utils::matSize))
+
+        .function("ptr", select_overload<val(const Mat&, int)>(&binding_utils::matPtr<unsigned char>))
+        .function("ptr", select_overload<val(const Mat&, int, int)>(&binding_utils::matPtr<unsigned char>))
+        .function("ucharPtr", select_overload<val(const Mat&, int)>(&binding_utils::matPtr<unsigned char>))
+        .function("ucharPtr", select_overload<val(const Mat&, int, int)>(&binding_utils::matPtr<unsigned char>))
+        .function("charPtr", select_overload<val(const Mat&, int)>(&binding_utils::matPtr<char>))
+        .function("charPtr", select_overload<val(const Mat&, int, int)>(&binding_utils::matPtr<char>))
+        .function("shortPtr", select_overload<val(const Mat&, int)>(&binding_utils::matPtr<short>))
+        .function("shortPtr", select_overload<val(const Mat&, int, int)>(&binding_utils::matPtr<short>))
+        .function("ushortPtr", select_overload<val(const Mat&, int)>(&binding_utils::matPtr<unsigned short>))
+        .function("ushortPtr", select_overload<val(const Mat&, int, int)>(&binding_utils::matPtr<unsigned short>))
+        .function("intPtr", select_overload<val(const Mat&, int)>(&binding_utils::matPtr<int>))
+        .function("intPtr", select_overload<val(const Mat&, int, int)>(&binding_utils::matPtr<int>))
+        .function("floatPtr", select_overload<val(const Mat&, int)>(&binding_utils::matPtr<float>))
+        .function("floatPtr", select_overload<val(const Mat&, int, int)>(&binding_utils::matPtr<float>))
+        .function("doublePtr", select_overload<val(const Mat&, int)>(&binding_utils::matPtr<double>))
+        .function("doublePtr", select_overload<val(const Mat&, int, int)>(&binding_utils::matPtr<double>))
+
+        .function("charAt", select_overload<char&(int)>(&cv::Mat::at<char>))
+        .function("charAt", select_overload<char&(int, int)>(&cv::Mat::at<char>))
+        .function("charAt", select_overload<char&(int, int, int)>(&cv::Mat::at<char>))
+        .function("ucharAt", select_overload<unsigned char&(int)>(&cv::Mat::at<unsigned char>))
+        .function("ucharAt", select_overload<unsigned char&(int, int)>(&cv::Mat::at<unsigned char>))
+        .function("ucharAt", select_overload<unsigned char&(int, int, int)>(&cv::Mat::at<unsigned char>))
+        .function("shortAt", select_overload<short&(int)>(&cv::Mat::at<short>))
+        .function("shortAt", select_overload<short&(int, int)>(&cv::Mat::at<short>))
+        .function("shortAt", select_overload<short&(int, int, int)>(&cv::Mat::at<short>))
+        .function("ushortAt", select_overload<unsigned short&(int)>(&cv::Mat::at<unsigned short>))
+        .function("ushortAt", select_overload<unsigned short&(int, int)>(&cv::Mat::at<unsigned short>))
+        .function("ushortAt", select_overload<unsigned short&(int, int, int)>(&cv::Mat::at<unsigned short>))
+        .function("intAt", select_overload<int&(int)>(&cv::Mat::at<int>) )
+        .function("intAt", select_overload<int&(int, int)>(&cv::Mat::at<int>) )
+        .function("intAt", select_overload<int&(int, int, int)>(&cv::Mat::at<int>) )
+        .function("floatAt", select_overload<float&(int)>(&cv::Mat::at<float>))
+        .function("floatAt", select_overload<float&(int, int)>(&cv::Mat::at<float>))
+        .function("floatAt", select_overload<float&(int, int, int)>(&cv::Mat::at<float>))
+        .function("doubleAt", select_overload<double&(int, int, int)>(&cv::Mat::at<double>))
+        .function("doubleAt", select_overload<double&(int)>(&cv::Mat::at<double>))
+        .function("doubleAt", select_overload<double&(int, int)>(&cv::Mat::at<double>));
+
+    emscripten::value_object<cv::Range>("Range")
+        .field("start", &cv::Range::start)
+        .field("end", &cv::Range::end);
+
+    emscripten::value_object<cv::TermCriteria>("TermCriteria")
+        .field("type", &cv::TermCriteria::type)
+        .field("maxCount", &cv::TermCriteria::maxCount)
+        .field("epsilon", &cv::TermCriteria::epsilon);
+
+#define EMSCRIPTEN_CV_SIZE(type) \
+    emscripten::value_object<type>("#type") \
+        .field("width", &type::width) \
+        .field("height", &type::height);
+
+    EMSCRIPTEN_CV_SIZE(Size)
+    EMSCRIPTEN_CV_SIZE(Size2f)
+
+#define EMSCRIPTEN_CV_POINT(type) \
+    emscripten::value_object<type>("#type") \
+        .field("x", &type::x) \
+        .field("y", &type::y); \
+
+    EMSCRIPTEN_CV_POINT(Point)
+    EMSCRIPTEN_CV_POINT(Point2f)
+
+#define EMSCRIPTEN_CV_RECT(type, name) \
+    emscripten::value_object<cv::Rect_<type>> (name) \
+        .field("x", &cv::Rect_<type>::x) \
+        .field("y", &cv::Rect_<type>::y) \
+        .field("width", &cv::Rect_<type>::width) \
+        .field("height", &cv::Rect_<type>::height);
+
+    EMSCRIPTEN_CV_RECT(int, "Rect")
+    EMSCRIPTEN_CV_RECT(float, "Rect2f")
+
+    emscripten::value_object<cv::RotatedRect>("RotatedRect")
+        .field("center", &cv::RotatedRect::center)
+        .field("size", &cv::RotatedRect::size)
+        .field("angle", &cv::RotatedRect::angle);
+
+    function("rotatedRectPoints", select_overload<emscripten::val(const cv::RotatedRect&)>(&binding_utils::rotatedRectPoints));
+    function("rotatedRectBoundingRect", select_overload<Rect(const cv::RotatedRect&)>(&binding_utils::rotatedRectBoundingRect));
+    function("rotatedRectBoundingRect2f", select_overload<Rect2f(const cv::RotatedRect&)>(&binding_utils::rotatedRectBoundingRect2f));
+
+    emscripten::value_array<cv::Scalar_<double>> ("Scalar")
+        .element(index<0>())
+        .element(index<1>())
+        .element(index<2>())
+        .element(index<3>());
+
+    emscripten::value_object<binding_utils::MinMaxLoc>("MinMaxLoc")
+        .field("minVal", &binding_utils::MinMaxLoc::minVal)
+        .field("maxVal", &binding_utils::MinMaxLoc::maxVal)
+        .field("minLoc", &binding_utils::MinMaxLoc::minLoc)
+        .field("maxLoc", &binding_utils::MinMaxLoc::maxLoc);
+
+    emscripten::value_object<binding_utils::Circle>("Circle")
+        .field("center", &binding_utils::Circle::center)
+        .field("radius", &binding_utils::Circle::radius);
+
+    emscripten::value_object<cv::Moments >("Moments")
+        .field("m00", &cv::Moments::m00)
+        .field("m10", &cv::Moments::m10)
+        .field("m01", &cv::Moments::m01)
+        .field("m20", &cv::Moments::m20)
+        .field("m11", &cv::Moments::m11)
+        .field("m02", &cv::Moments::m02)
+        .field("m30", &cv::Moments::m30)
+        .field("m21", &cv::Moments::m21)
+        .field("m12", &cv::Moments::m12)
+        .field("m03", &cv::Moments::m03)
+        .field("mu20", &cv::Moments::mu20)
+        .field("mu11", &cv::Moments::mu11)
+        .field("mu02", &cv::Moments::mu02)
+        .field("mu30", &cv::Moments::mu30)
+        .field("mu21", &cv::Moments::mu21)
+        .field("mu12", &cv::Moments::mu12)
+        .field("mu03", &cv::Moments::mu03)
+        .field("nu20", &cv::Moments::nu20)
+        .field("nu11", &cv::Moments::nu11)
+        .field("nu02", &cv::Moments::nu02)
+        .field("nu30", &cv::Moments::nu30)
+        .field("nu21", &cv::Moments::nu21)
+        .field("nu12", &cv::Moments::nu12)
+        .field("nu03", &cv::Moments::nu03);
+
+    emscripten::value_object<cv::Exception>("Exception")
+        .field("code", &cv::Exception::code)
+        .field("msg", &binding_utils::getExceptionMsg, &binding_utils::setExceptionMsg);
+
+    function("exceptionFromPtr", &binding_utils::exceptionFromPtr, allow_raw_pointers());
+
+    function("minEnclosingCircle", select_overload<binding_utils::Circle(const cv::Mat&)>(&binding_utils::minEnclosingCircle));
+
+    function("minMaxLoc", select_overload<binding_utils::MinMaxLoc(const cv::Mat&, const cv::Mat&)>(&binding_utils::minMaxLoc));
+
+    function("minMaxLoc", select_overload<binding_utils::MinMaxLoc(const cv::Mat&)>(&binding_utils::minMaxLoc_1));
+
+    function("morphologyDefaultBorderValue", &cv::morphologyDefaultBorderValue);
+
+    function("CV_MAT_DEPTH", &binding_utils::cvMatDepth);
+
+    function("CamShift", select_overload<emscripten::val(const cv::Mat&, Rect&, TermCriteria)>(&binding_utils::CamShiftWrapper));
+
+    function("meanShift", select_overload<emscripten::val(const cv::Mat&, Rect&, TermCriteria)>(&binding_utils::meanShiftWrapper));
+
+    function("getBuildInformation", &binding_utils::getBuildInformation);
+
+    constant("CV_8UC1", CV_8UC1);
+    constant("CV_8UC2", CV_8UC2);
+    constant("CV_8UC3", CV_8UC3);
+    constant("CV_8UC4", CV_8UC4);
+
+    constant("CV_8SC1", CV_8SC1);
+    constant("CV_8SC2", CV_8SC2);
+    constant("CV_8SC3", CV_8SC3);
+    constant("CV_8SC4", CV_8SC4);
+
+    constant("CV_16UC1", CV_16UC1);
+    constant("CV_16UC2", CV_16UC2);
+    constant("CV_16UC3", CV_16UC3);
+    constant("CV_16UC4", CV_16UC4);
+
+    constant("CV_16SC1", CV_16SC1);
+    constant("CV_16SC2", CV_16SC2);
+    constant("CV_16SC3", CV_16SC3);
+    constant("CV_16SC4", CV_16SC4);
+
+    constant("CV_32SC1", CV_32SC1);
+    constant("CV_32SC2", CV_32SC2);
+    constant("CV_32SC3", CV_32SC3);
+    constant("CV_32SC4", CV_32SC4);
+
+    constant("CV_32FC1", CV_32FC1);
+    constant("CV_32FC2", CV_32FC2);
+    constant("CV_32FC3", CV_32FC3);
+    constant("CV_32FC4", CV_32FC4);
+
+    constant("CV_64FC1", CV_64FC1);
+    constant("CV_64FC2", CV_64FC2);
+    constant("CV_64FC3", CV_64FC3);
+    constant("CV_64FC4", CV_64FC4);
+
+    constant("CV_8U", CV_8U);
+    constant("CV_8S", CV_8S);
+    constant("CV_16U", CV_16U);
+    constant("CV_16S", CV_16S);
+    constant("CV_32S",  CV_32S);
+    constant("CV_32F", CV_32F);
+    constant("CV_64F", CV_64F);
+
+    constant("INT_MIN", INT_MIN);
+    constant("INT_MAX", INT_MAX);
+}
diff --git a/modules/js/src/embindgen.py b/modules/js/src/embindgen.py
new file mode 100644 (file)
index 0000000..124ab90
--- /dev/null
@@ -0,0 +1,920 @@
+###############################################################################
+#
+#  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+#
+#  By downloading, copying, installing or using the software you agree to this license.
+#  If you do not agree to this license, do not download, install,
+#  copy or use the software.
+#
+#
+#                           License Agreement
+#                For Open Source Computer Vision Library
+#
+# Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+# Third party copyrights are property of their respective owners.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+#   * Redistribution's of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#   * Redistribution's in binary form must reproduce the above copyright notice,
+#     this list of conditions and the following disclaimer in the documentation
+#     and/or other materials provided with the distribution.
+#
+#   * The name of the copyright holders may not be used to endorse or promote products
+#     derived from this software without specific prior written permission.
+#
+# This software is provided by the copyright holders and contributors "as is" and
+# any express or implied warranties, including, but not limited to, the implied
+# warranties of merchantability and fitness for a particular purpose are disclaimed.
+# In no event shall the Intel Corporation or contributors be liable for any direct,
+# indirect, incidental, special, exemplary, or consequential damages
+# (including, but not limited to, procurement of substitute goods or services;
+# loss of use, data, or profits; or business interruption) however caused
+# and on any theory of liability, whether in contract, strict liability,
+# or tort (including negligence or otherwise) arising in any way out of
+# the use of this software, even if advised of the possibility of such damage.
+#
+
+###############################################################################
+# AUTHOR: Sajjad Taheri, University of California, Irvine. sajjadt[at]uci[dot]edu
+#
+#                             LICENSE AGREEMENT
+# Copyright (c) 2015, 2015 The Regents of the University of California (Regents)
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. Neither the name of the University nor the
+#    names of its contributors may be used to endorse or promote products
+#    derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+###############################################################################
+
+from __future__ import print_function
+import sys, re, os
+from templates import *
+from sets import Set
+
+if sys.version_info[0] >= 3:
+    from io import StringIO
+else:
+    from cStringIO import StringIO
+
+
+func_table = {}
+
+# Ignore these functions due to Embind limitations for now
+ignore_list = ['locate',  #int&
+               'minEnclosingCircle',  #float&
+               'checkRange',
+               'minMaxLoc',   #double*
+               'floodFill',
+               'phaseCorrelate',
+               'randShuffle',
+               'calibrationMatrixValues', #double&
+               'undistortPoints', # global redefinition
+               'CamShift', #Rect&
+               'meanShift' #Rect&
+               ]
+
+# Classes and methods whitelist
+core = {'': ['absdiff', 'add', 'addWeighted', 'bitwise_and', 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'cartToPolar',\
+             'compare', 'convertScaleAbs', 'copyMakeBorder', 'countNonZero', 'determinant', 'dft', 'divide', 'eigen', \
+             'exp', 'flip', 'getOptimalDFTSize','gemm', 'hconcat', 'inRange', 'invert', 'kmeans', 'log', 'magnitude', \
+             'max', 'mean', 'meanStdDev', 'merge', 'min', 'minMaxLoc', 'mixChannels', 'multiply', 'norm', 'normalize', \
+             'perspectiveTransform', 'polarToCart', 'pow', 'randn', 'randu', 'reduce', 'repeat', 'setIdentity', 'setRNGSeed', \
+             'solve', 'solvePoly', 'split', 'sqrt', 'subtract', 'trace', 'transform', 'transpose', 'vconcat'],
+        'Algorithm': []}
+
+imgproc = {'': ['Canny', 'GaussianBlur', 'Laplacian', 'HoughLines', 'HoughLinesP', 'HoughCircles', 'Scharr','Sobel', \
+                'adaptiveThreshold','approxPolyDP','arcLength','bilateralFilter','blur','boundingRect','boxFilter',\
+                'calcBackProject','calcHist','circle','compareHist','connectedComponents','connectedComponentsWithStats', \
+                'contourArea', 'convexHull', 'convexityDefects', 'cornerHarris','cornerMinEigenVal','createCLAHE', \
+                'createLineSegmentDetector','cvtColor','demosaicing','dilate', 'distanceTransform','distanceTransformWithLabels', \
+                'drawContours','ellipse','ellipse2Poly','equalizeHist','erode', 'filter2D', 'findContours','fitEllipse', \
+                'fitLine', 'floodFill','getAffineTransform', 'getPerspectiveTransform', 'getRotationMatrix2D', 'getStructuringElement', \
+                'goodFeaturesToTrack','grabCut','initUndistortRectifyMap', 'integral','integral2', 'isContourConvex', 'line', \
+                'matchShapes', 'matchTemplate','medianBlur', 'minAreaRect', 'minEnclosingCircle', 'moments', 'morphologyEx', \
+                'pointPolygonTest', 'putText','pyrDown','pyrUp','rectangle','remap', 'resize','sepFilter2D','threshold', \
+                'undistort','warpAffine','warpPerspective','watershed'],
+           'CLAHE': ['apply', 'collectGarbage', 'getClipLimit', 'getTilesGridSize', 'setClipLimit', 'setTilesGridSize']}
+
+objdetect = {'': ['groupRectangles'],
+             'HOGDescriptor': ['load', 'HOGDescriptor', 'getDefaultPeopleDetector', 'getDaimlerPeopleDetector', 'setSVMDetector', 'detectMultiScale'],
+             'CascadeClassifier': ['load', 'detectMultiScale2', 'CascadeClassifier', 'detectMultiScale3', 'empty', 'detectMultiScale']}
+
+video = {'': ['CamShift', 'calcOpticalFlowFarneback', 'calcOpticalFlowPyrLK', 'createBackgroundSubtractorMOG2', 'estimateRigidTransform',\
+             'findTransformECC', 'meanShift'],
+         'BackgroundSubtractorMOG2': ['BackgroundSubtractorMOG2', 'apply'],
+         'BackgroundSubtractor': ['apply', 'getBackgroundImage']}
+
+def makeWhiteList(module_list):
+    wl = {}
+    for m in module_list:
+        for k in m.keys():
+            if k in wl:
+                wl[k] += m[k]
+            else:
+                wl[k] = m[k]
+    return wl
+
+white_list = makeWhiteList([core, imgproc, objdetect, video])
+
+# Features to be exported
+export_enums = False
+export_consts = True
+with_wrapped_functions = True
+with_default_params = True
+with_vec_from_js_array = True
+
+wrapper_namespace = "Wrappers"
+type_dict = {
+    'InputArray': 'const cv::Mat&',
+    'OutputArray': 'cv::Mat&',
+    'InputOutputArray': 'cv::Mat&',
+    'InputArrayOfArrays': 'const std::vector<cv::Mat>&',
+    'OutputArrayOfArrays': 'std::vector<cv::Mat>&',
+    'String': 'std::string',
+    'const String&':'const std::string&'
+}
+
+def normalize_class_name(name):
+    return re.sub(r"^cv\.", "", name).replace(".", "_")
+
+
+class ClassProp(object):
+    def __init__(self, decl):
+        self.tp = decl[0].replace("*", "_ptr").strip()
+        self.name = decl[1]
+        self.readonly = True
+        if "/RW" in decl[3]:
+            self.readonly = False
+
+
+class ClassInfo(object):
+    def __init__(self, name, decl=None):
+        self.cname = name.replace(".", "::")
+        self.name = self.wname = normalize_class_name(name)
+
+        self.ismap = False
+        self.issimple = False
+        self.isalgorithm = False
+        self.methods = {}
+        self.ext_constructors = {}
+        self.props = []
+        self.consts = {}
+        customname = False
+        self.jsfuncs = {}
+        self.constructor_arg_num = Set()
+
+        self.has_smart_ptr = False
+
+        if decl:
+            self.bases = decl[1].split()[1:]
+            if len(self.bases) > 1:
+                self.bases = [self.bases[0].strip(",")]
+                # return sys.exit(-1)
+            if self.bases and self.bases[0].startswith("cv::"):
+                self.bases[0] = self.bases[0][4:]
+            if self.bases and self.bases[0] == "Algorithm":
+                self.isalgorithm = True
+            for m in decl[2]:
+                if m.startswith("="):
+                    self.wname = m[1:]
+                    customname = True
+                elif m == "/Map":
+                    self.ismap = True
+                elif m == "/Simple":
+                    self.issimple = True
+            self.props = [ClassProp(p) for p in decl[3]]
+
+        if not customname and self.wname.startswith("Cv"):
+            self.wname = self.wname[2:]
+
+
+def handle_ptr(tp):
+    if tp.startswith('Ptr_'):
+        tp = 'Ptr<' + "::".join(tp.split('_')[1:]) + '>'
+    return tp
+
+def handle_vector(tp):
+    if tp.startswith('vector_'):
+        tp = 'std::vector<' + "::".join(tp.split('_')[1:]) + '>'
+    return tp
+
+
+class ArgInfo(object):
+    def __init__(self, arg_tuple):
+        self.tp = handle_ptr(arg_tuple[0]).strip()
+        self.name = arg_tuple[1]
+        self.defval = arg_tuple[2]
+        self.isarray = False
+        self.arraylen = 0
+        self.arraycvt = None
+        self.inputarg = True
+        self.outputarg = False
+        self.returnarg = False
+        self.const = False
+        self.reference = False
+        for m in arg_tuple[3]:
+            if m == "/O":
+                self.inputarg = False
+                self.outputarg = True
+                self.returnarg = True
+            elif m == "/IO":
+                self.inputarg = True
+                self.outputarg = True
+                self.returnarg = True
+            elif m.startswith("/A"):
+                self.isarray = True
+                self.arraylen = m[2:].strip()
+            elif m.startswith("/CA"):
+                self.isarray = True
+                self.arraycvt = m[2:].strip()
+            elif m == "/C":
+                self.const = True
+            elif m == "/Ref":
+                self.reference = True
+        if self.tp == "Mat":
+            if self.outputarg:
+                self.tp = "cv::Mat&"
+            elif self.inputarg:
+                self.tp = "const cv::Mat&"
+        if self.tp == "vector_Mat":
+            if self.outputarg:
+                self.tp = "std::vector<cv::Mat>&"
+            elif self.inputarg:
+                self.tp = "const std::vector<cv::Mat>&"
+        self.tp = handle_vector(self.tp).strip()
+        if self.const:
+            self.tp = "const " + self.tp
+        if self.reference:
+            self.tp = self.tp + "&"
+        self.py_inputarg = False
+        self.py_outputarg = False
+
+class FuncVariant(object):
+    def __init__(self, class_name, name, decl, is_constructor, is_class_method, is_const, is_virtual, is_pure_virtual, ref_return, const_return):
+        self.class_name = class_name
+        self.name = self.wname = name
+        self.is_constructor = is_constructor
+        self.is_class_method = is_class_method
+        self.is_const = is_const
+        self.is_virtual = is_virtual
+        self.is_pure_virtual = is_pure_virtual
+        self.refret = ref_return
+        self.constret = const_return
+        self.rettype = handle_vector(handle_ptr(decl[1]).strip()).strip()
+        if self.rettype == "void":
+            self.rettype = ""
+        self.args = []
+        self.array_counters = {}
+
+        for a in decl[3]:
+            ainfo = ArgInfo(a)
+            if ainfo.isarray and not ainfo.arraycvt:
+                c = ainfo.arraylen
+                c_arrlist = self.array_counters.get(c, [])
+                if c_arrlist:
+                    c_arrlist.append(ainfo.name)
+                else:
+                    self.array_counters[c] = [ainfo.name]
+            self.args.append(ainfo)
+
+
+class FuncInfo(object):
+    def __init__(self, class_name, name, cname, namespace, isconstructor):
+        self.class_name = class_name
+        self.name = name
+        self.cname = cname
+        self.namespace = namespace
+        self.variants = []
+        self.is_constructor = isconstructor
+
+    def add_variant(self, variant):
+        self.variants.append(variant)
+
+
+class Namespace(object):
+    def __init__(self):
+        self.funcs = {}
+        self.enums = {}
+        self.consts = {}
+
+
+class JSWrapperGenerator(object):
+    def __init__(self):
+
+        self.bindings = []
+        self.wrapper_funcs = []
+
+        self.classes = {}
+        self.namespaces = {}
+        self.enums = {}
+
+        self.parser = hdr_parser.CppHeaderParser()
+        self.class_idx = 0
+
+    def add_class(self, stype, name, decl):
+        class_info = ClassInfo(name, decl)
+        class_info.decl_idx = self.class_idx
+        self.class_idx += 1
+
+        if class_info.name in self.classes:
+            print("Generator error: class %s (cpp_name=%s) already exists" \
+                  % (class_info.name, class_info.cname))
+            sys.exit(-1)
+        self.classes[class_info.name] = class_info
+
+        if class_info.bases:
+            chunks = class_info.bases[0].split('::')
+            base = '_'.join(chunks)
+            while base not in self.classes and len(chunks) > 1:
+                del chunks[-2]
+                base = '_'.join(chunks)
+            if base not in self.classes:
+                print("Generator error: unable to resolve base %s for %s"
+                      % (class_info.bases[0], class_info.name))
+                sys.exit(-1)
+            else:
+                class_info.bases[0] = "::".join(chunks)
+                class_info.isalgorithm |= self.classes[base].isalgorithm
+
+    def split_decl_name(self, name):
+        chunks = name.split('.')
+        namespace = chunks[:-1]
+        classes = []
+        while namespace and '.'.join(namespace) not in self.parser.namespaces:
+            classes.insert(0, namespace.pop())
+        return namespace, classes, chunks[-1]
+
+    def add_enum(self, decl):
+        name = decl[1]
+        namespace, classes, val = self.split_decl_name(name)
+        namespace = '.'.join(namespace)
+        val = '_'.join(classes + [name])
+        cname = name.replace('.', '::')
+        ns = self.namespaces.setdefault(namespace, Namespace())
+        if name in ns.enums:
+            print("Generator warning: constant %s (cname=%s) already exists" \
+                  % (name, cname))
+            # sys.exit(-1)
+        else:
+            ns.enums[name] = []
+        for item in decl[3]:
+            ns.enums[name].append(item)
+
+    def add_const(self, name, decl):
+        cname = name.replace('.','::')
+        namespace, classes, name = self.split_decl_name(name)
+        namespace = '.'.join(namespace)
+        name = '_'.join(classes+[name])
+        ns = self.namespaces.setdefault(namespace, Namespace())
+        if name in ns.consts:
+            print("Generator error: constant %s (cname=%s) already exists" \
+                % (name, cname))
+            sys.exit(-1)
+        ns.consts[name] = cname
+
+    def add_func(self, decl):
+        namespace, classes, barename = self.split_decl_name(decl[0])
+        cpp_name = "::".join(namespace + classes + [barename])
+        name = barename
+        class_name = ''
+        bare_class_name = ''
+        if classes:
+            class_name = normalize_class_name('.'.join(namespace + classes))
+            bare_class_name = classes[-1]
+        namespace = '.'.join(namespace)
+
+        is_constructor = name == bare_class_name
+        is_class_method = False
+        is_const_method = False
+        is_virtual_method = False
+        is_pure_virtual_method = False
+        const_return = False
+        ref_return = False
+
+        for m in decl[2]:
+            if m == "/S":
+                is_class_method = True
+            elif m == "/C":
+                is_const_method = True
+            elif m == "/V":
+                is_virtual_method = True
+            elif m == "/PV":
+                is_pure_virtual_method = True
+            elif m == "/Ref":
+                ref_return = True
+            elif m == "/CRet":
+                const_return = True
+            elif m.startswith("="):
+                name = m[1:]
+
+        if class_name:
+            cpp_name = barename
+            func_map = self.classes[class_name].methods
+        else:
+            func_map = self.namespaces.setdefault(namespace, Namespace()).funcs
+
+        func = func_map.setdefault(name, FuncInfo(class_name, name, cpp_name, namespace, is_constructor))
+
+        variant = FuncVariant(class_name, name, decl, is_constructor, is_class_method, is_const_method,
+                        is_virtual_method, is_pure_virtual_method, ref_return, const_return)
+        func.add_variant(variant)
+
+    def save(self, path, name, buf):
+        f = open(path + "/" + name, "wt")
+        f.write(buf.getvalue())
+        f.close()
+
+    def gen_function_binding_with_wrapper(self, func, class_info):
+
+        binding_text = None
+        wrapper_func_text = None
+
+        bindings = []
+        wrappers = []
+
+        for index, variant in enumerate(func.variants):
+
+            factory = False
+            if class_info and 'Ptr<' in variant.rettype:
+
+                factory = True
+                base_class_name = variant.rettype
+                base_class_name = base_class_name.replace("Ptr<","").replace(">","").strip()
+                if base_class_name in self.classes:
+                    self.classes[base_class_name].has_smart_ptr = True
+                else:
+                    print(base_class_name, ' not found in classes for registering smart pointer using ', class_info.name, 'instead')
+                    self.classes[class_info.name].has_smart_ptr = True
+
+            def_args = []
+            has_def_param = False
+
+            # Return type
+            ret_type = 'void' if variant.rettype.strip() == '' else variant.rettype
+            if ret_type.startswith('Ptr'): #smart pointer
+                ptr_type = ret_type.replace('Ptr<', '').replace('>', '')
+                if ptr_type in type_dict:
+                    ret_type = type_dict[ptr_type]
+            for key in type_dict:
+                if key in ret_type:
+                    ret_type = ret_type.replace(key, type_dict[key])
+
+            arg_types = []
+            unwrapped_arg_types = []
+            for arg in variant.args:
+                arg_type = None
+                if arg.tp in type_dict:
+                    arg_type = type_dict[arg.tp]
+                else:
+                    arg_type = arg.tp
+                # Add default value
+                if with_default_params and arg.defval != '':
+                    def_args.append(arg.defval);
+                arg_types.append(arg_type)
+                unwrapped_arg_types.append(arg_type)
+
+            # Function attribure
+            func_attribs = ''
+            if '*' in ''.join(arg_types):
+                func_attribs += ', allow_raw_pointers()'
+
+            if variant.is_pure_virtual:
+                func_attribs += ', pure_virtual()'
+
+
+            # Wrapper function
+            wrap_func_name = (func.class_name+"_" if class_info != None else "") + func.name.split("::")[-1] + "_wrapper"
+            js_func_name = func.name
+
+            # TODO: Name functions based wrap directives or based on arguments list
+            if index > 0:
+                wrap_func_name += str(index)
+                js_func_name += str(index)
+
+            c_func_name = 'Wrappers::' + wrap_func_name
+
+            # Binding template-
+            raw_arg_names = ['arg' + str(i + 1) for i in range(0, len(variant.args))]
+            arg_names = []
+            w_signature = []
+            casted_arg_types = []
+            for arg_type, arg_name in zip(arg_types, raw_arg_names):
+                casted_arg_name = arg_name
+                if with_vec_from_js_array:
+                    # Only support const vector reference as input parameter
+                    match = re.search(r'const std::vector<(.*)>&', arg_type)
+                    if match:
+                        type_in_vect = match.group(1)
+                        if type_in_vect != 'cv::Mat':
+                            casted_arg_name = 'emscripten::vecFromJSArray<' + type_in_vect + '>(' + arg_name + ')'
+                            arg_type = re.sub(r'std::vector<(.*)>', 'emscripten::val', arg_type)
+                w_signature.append(arg_type + ' ' + arg_name)
+                arg_names.append(casted_arg_name)
+                casted_arg_types.append(arg_type)
+
+            arg_types = casted_arg_types
+
+            # Argument list, signature
+            arg_names_casted = [c if a == b else c + '.as<' + a + '>()' for a, b, c in
+                                zip(unwrapped_arg_types, arg_types, arg_names)]
+
+            # Add self object to the parameters
+            if class_info and not  factory:
+                arg_types = [class_info.cname + '&'] + arg_types
+                w_signature = [class_info.cname + '& arg0 '] + w_signature
+
+            for j in range(0, len(def_args) + 1):
+                postfix = ''
+                if j > 0:
+                    postfix = '_' + str(j);
+
+                ###################################
+                # Wrapper
+                if factory: # TODO or static
+                    name = class_info.cname+'::' if variant.class_name else ""
+                    cpp_call_text = static_class_call_template.substitute(scope=name,
+                                                                   func=func.cname,
+                                                                   args=', '.join(arg_names[:len(arg_names)-j]))
+                elif class_info:
+                    cpp_call_text = class_call_template.substitute(obj='arg0',
+                                                                   func=func.cname,
+                                                                   args=', '.join(arg_names[:len(arg_names)-j]))
+                else:
+                    cpp_call_text = call_template.substitute(func=func.cname,
+                                                             args=', '.join(arg_names[:len(arg_names)-j]))
+
+
+                wrapper_func_text = wrapper_function_template.substitute(ret_val=ret_type,
+                                                                             func=wrap_func_name+postfix,
+                                                                             signature=', '.join(w_signature[:len(w_signature)-j]),
+                                                                             cpp_call=cpp_call_text,
+                                                                             const='' if variant.is_const else '')
+
+                ###################################
+                # Binding
+                if class_info:
+                    if factory:
+                        # print("Factory Function: ", c_func_name, len(variant.args) - j, class_info.name)
+                        if variant.is_pure_virtual:
+                            # FIXME: workaround for pure virtual in constructor
+                            # e.g. DescriptorMatcher_clone_wrapper
+                            continue
+                        # consider the default parameter variants
+                        args_num = len(variant.args) - j
+                        if args_num in class_info.constructor_arg_num:
+                            # FIXME: workaournd for constructor overload with same args number
+                            # e.g. DescriptorMatcher
+                            continue
+                        class_info.constructor_arg_num.add(args_num)
+                        binding_text = ctr_template.substitute(const='const' if variant.is_const else '',
+                                                           cpp_name=c_func_name+postfix,
+                                                           ret=ret_type,
+                                                           args=','.join(arg_types[:len(arg_types)-j]),
+                                                           optional=func_attribs)
+                    else:
+                        binding_template = overload_class_static_function_template if variant.is_class_method else \
+                            overload_class_function_template
+                        binding_text = binding_template.substitute(js_name=js_func_name,
+                                                           const='' if variant.is_const else '',
+                                                           cpp_name=c_func_name+postfix,
+                                                           ret=ret_type,
+                                                           args=','.join(arg_types[:len(arg_types)-j]),
+                                                           optional=func_attribs)
+                else:
+                    binding_text = overload_function_template.substitute(js_name=js_func_name,
+                                                       cpp_name=c_func_name+postfix,
+                                                       const='const' if variant.is_const else '',
+                                                       ret=ret_type,
+                                                       args=', '.join(arg_types[:len(arg_types)-j]),
+                                                       optional=func_attribs)
+
+                bindings.append(binding_text)
+                wrappers.append(wrapper_func_text)
+
+        return [bindings, wrappers]
+
+
+    def gen_function_binding(self, func, class_info):
+
+        if not class_info == None :
+            func_name = class_info.cname+'::'+func.cname
+        else :
+            func_name = func.cname
+
+        binding_text = None
+        binding_text_list = []
+
+        for index, variant in enumerate(func.variants):
+            factory = False
+            #TODO if variant.is_class_method and variant.rettype == ('Ptr<' + class_info.name + '>'):
+            if (not class_info == None) and variant.rettype == ('Ptr<' + class_info.name + '>') or (func.name.startswith("create") and variant.rettype):
+                factory = True
+                base_class_name = variant.rettype
+                base_class_name = base_class_name.replace("Ptr<","").replace(">","").strip()
+                if base_class_name in self.classes:
+                    self.classes[base_class_name].has_smart_ptr = True
+                else:
+                    print(base_class_name, ' not found in classes for registering smart pointer using ', class_info.name, 'instead')
+                    self.classes[class_info.name].has_smart_ptr = True
+
+
+            # Return type
+            ret_type = 'void' if variant.rettype.strip() == '' else variant.rettype
+
+            ret_type = ret_type.strip()
+
+            if ret_type.startswith('Ptr'): #smart pointer
+                ptr_type = ret_type.replace('Ptr<', '').replace('>', '')
+                if ptr_type in type_dict:
+                    ret_type = type_dict[ptr_type]
+            for key in type_dict:
+                if key in ret_type:
+                    ret_type = ret_type.replace(key, type_dict[key])
+
+            if variant.constret and ret_type.startswith('const') == False:
+                ret_type = 'const ' + ret_type
+            if variant.refret and ret_type.endswith('&') == False:
+                ret_type += '&'
+
+            arg_types = []
+            orig_arg_types = []
+            def_args = []
+            for arg in variant.args:
+                if arg.tp in type_dict:
+                    arg_type = type_dict[arg.tp]
+                else:
+                    arg_type = arg.tp
+
+                #if arg.outputarg:
+                #    arg_type += '&'
+                orig_arg_types.append(arg_type)
+                if with_default_params and arg.defval != '':
+                    def_args.append(arg.defval)
+                arg_types.append(orig_arg_types[-1])
+
+            # Function attribure
+            func_attribs = ''
+            if '*' in ''.join(orig_arg_types):
+                func_attribs += ', allow_raw_pointers()'
+
+            if variant.is_pure_virtual:
+                func_attribs += ', pure_virtual()'
+
+            #TODO better naming
+            #if variant.name in self.jsfunctions:
+            #else
+            js_func_name = variant.name
+
+
+            c_func_name = func.cname if (factory and variant.is_class_method == False) else func_name
+
+
+            ################################### Binding
+            for j in range(0, len(def_args) + 1):
+                postfix = ''
+                if j > 0:
+                    postfix = '_' + str(j);
+                if factory:
+                    binding_text = ctr_template.substitute(const='const' if variant.is_const else '',
+                                                           cpp_name=c_func_name+postfix,
+                                                           ret=ret_type,
+                                                           args=','.join(arg_types[:len(arg_types)-j]),
+                                                           optional=func_attribs)
+                else:
+                    binding_template = overload_class_static_function_template if variant.is_class_method else \
+                            overload_function_template if class_info == None else overload_class_function_template
+                    binding_text = binding_template.substitute(js_name=js_func_name,
+                                                               const='const' if variant.is_const else '',
+                                                               cpp_name=c_func_name+postfix,
+                                                               ret=ret_type,
+                                                               args=','.join(arg_types[:len(arg_types)-1]),
+                                                               optional=func_attribs)
+
+                binding_text_list.append(binding_text)
+
+        return binding_text_list
+
+    def print_decls(self, decls):
+        """
+        Prints the list of declarations, retrieived by the parse() method
+        """
+        for d in decls:
+            print(d[0], d[1], ";".join(d[2]))
+            for a in d[3]:
+                print("   ", a[0], a[1], a[2], end="")
+                if a[3]:
+                    print("; ".join(a[3]))
+                else:
+                    print()
+
+    def gen(self, dst_file, src_files, core_bindings):
+        # step 1: scan the headers and extract classes, enums and functions
+        for hdr in src_files:
+            decls = self.parser.parse(hdr)
+            # print(hdr);
+            # self.print_decls(decls);
+            if len(decls) == 0:
+                continue
+            for decl in decls:
+                name = decl[0]
+                type = name[:name.find(" ")]
+                if type == "struct" or type == "class":  # class/structure case
+                    name = name[name.find(" ") + 1:].strip()
+                    self.add_class(type, name, decl)
+                elif name.startswith("enum"):  # enumerations
+                    self.add_enum(decl)
+                elif name.startswith("const"):
+                    # constant
+                    self.add_const(name.replace("const ", "").strip(), decl)
+                else:  # class/global function
+                    self.add_func(decl)
+
+        # step 2: generate bindings
+        # Global functions
+        for ns_name, ns in sorted(self.namespaces.items()):
+            if ns_name.split('.')[0] != 'cv':
+                continue
+            for name, func in sorted(ns.funcs.items()):
+                if name in ignore_list:
+                    continue
+                if not name in white_list['']:
+                    continue
+
+                ext_cnst = False
+                # Check if the method is an external constructor
+                for variant in func.variants:
+                    if "Ptr<" in variant.rettype:
+
+                        # Register the smart pointer
+                        base_class_name = variant.rettype
+                        base_class_name = base_class_name.replace("Ptr<","").replace(">","").strip()
+                        self.classes[base_class_name].has_smart_ptr = True
+
+                        # Adds the external constructor
+                        class_name = func.name.replace("create", "")
+                        if not class_name in self.classes:
+                            self.classes[base_class_name].methods[func.cname] = func
+                        else:
+                            self.classes[class_name].methods[func.cname] = func
+                        ext_cnst = True
+                if ext_cnst:
+                    continue
+
+                if with_wrapped_functions:
+                    binding, wrapper = self.gen_function_binding_with_wrapper(func, class_info=None)
+                    self.bindings += binding
+                    self.wrapper_funcs += wrapper
+                else:
+                    binding = self.gen_function_binding(func, class_info=None)
+                    self.bindings+=binding
+
+        # generate code for the classes and their methods
+        class_list = list(self.classes.items())
+
+        for name, class_info in class_list:
+            class_bindings = []
+            if not name in white_list:
+                continue
+
+            # Generate bindings for methods
+            for method_name, method in class_info.methods.iteritems():
+                if method.cname in ignore_list:
+                    continue
+                if not method.name in white_list[method.class_name]:
+                    continue
+                if method.is_constructor:
+                    for variant in method.variants:
+                        args = []
+                        for arg in variant.args:
+                            args.append(arg.tp)
+                        # print('Constructor: ', class_info.name, len(variant.args))
+                        args_num = len(variant.args)
+                        if args_num in class_info.constructor_arg_num:
+                            continue
+                        class_info.constructor_arg_num.add(args_num)
+                        class_bindings.append(constructor_template.substitute(signature=', '.join(args)))
+                else:
+                    if with_wrapped_functions and (len(method.variants) > 1 or len(method.variants[0].args)>0 or "String" in method.variants[0].rettype):
+                        binding, wrapper = self.gen_function_binding_with_wrapper(method, class_info=class_info)
+                        self.wrapper_funcs = self.wrapper_funcs + wrapper
+                        class_bindings = class_bindings + binding
+                    else:
+                        binding = self.gen_function_binding(method, class_info=class_info)
+                        class_bindings = class_bindings + binding
+
+            # Regiseter Smart pointer
+            if class_info.has_smart_ptr:
+                class_bindings.append(smart_ptr_reg_template.substitute(cname=class_info.cname, name=class_info.name))
+
+            # Attach external constructors
+            # for method_name, method in class_info.ext_constructors.iteritems():
+                # print("ext constructor", method_name)
+            #if class_info.ext_constructors:
+
+
+
+            # Generate bindings for properties
+            for property in class_info.props:
+                class_bindings.append(class_property_template.substitute(js_name=property.name, cpp_name='::'.join(
+                    [class_info.cname, property.name])))
+
+            dv = ''
+            base = Template("""base<$base$isPoly>""")
+
+            assert len(class_info.bases) <= 1 , "multiple inheritance not supported"
+
+            if len(class_info.bases) == 1:
+                dv = "," + base.substitute(base=', '.join(class_info.bases),
+                                           isPoly = " ,true" if class_info.name=="Feature2D" else "")
+
+            self.bindings.append(class_template.substitute(cpp_name=class_info.cname,
+                                                           js_name=name,
+                                                           class_templates=''.join(class_bindings),
+                                                           derivation=dv))
+
+        if export_enums:
+            # step 4: generate bindings for enums
+            # TODO anonymous enums are ignored for now.
+            for ns_name, ns in sorted(self.namespaces.items()):
+                if ns_name.split('.')[0] != 'cv':
+                    continue
+                for name, enum in sorted(ns.enums.items()):
+                    if not name.endswith('.anonymous'):
+                        name = name.replace("cv.", "")
+                        enum_values = []
+                        for enum_val in enum:
+                            value = enum_val[0][enum_val[0].rfind(".")+1:]
+                            enum_values.append(enum_item_template.substitute(val=value,
+                                                                             cpp_val=name.replace('.', '::')+'::'+value))
+
+                        self.bindings.append(enum_template.substitute(cpp_name=name.replace(".", "::"),
+                                                                      js_name=name.replace(".", "_"),
+                                                                      enum_items=''.join(enum_values)))
+                    else:
+                        print(name)
+                        #TODO: represent anonymous enums with constants
+
+        if export_consts:
+            # step 5: generate bindings for consts
+            for ns_name, ns in sorted(self.namespaces.items()):
+                if ns_name.split('.')[0] != 'cv':
+                    continue
+                for name, const in sorted(ns.consts.items()):
+                    # print("Gen consts: ", name, const)
+                    self.bindings.append(const_template.substitute(js_name=name, value=const))
+
+        with open(core_bindings) as f:
+            ret = f.read()
+
+        defis = '\n'.join(self.wrapper_funcs)
+        ret += wrapper_codes_template.substitute(ns=wrapper_namespace, defs=defis)
+        ret += emscripten_binding_template.substitute(binding_name='testBinding', bindings=''.join(self.bindings))
+
+
+        # print(ret)
+        text_file = open(dst_file, "w")
+        text_file.write(ret)
+        text_file.close()
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 4:
+        print("Usage:\n", \
+            os.path.basename(sys.argv[0]), \
+            "<full path to hdr_parser.py> <bindings.cpp> <headers.txt> <core_bindings.cpp>")
+        print("Current args are: ", ", ".join(["'"+a+"'" for a in sys.argv]))
+        exit(0)
+
+    dstdir = "."
+    hdr_parser_path = os.path.abspath(sys.argv[1])
+    if hdr_parser_path.endswith(".py"):
+        hdr_parser_path = os.path.dirname(hdr_parser_path)
+    sys.path.append(hdr_parser_path)
+    import hdr_parser
+
+    bindingsCpp = sys.argv[2]
+    headers = open(sys.argv[3], 'r').read().split(';')
+    coreBindings = sys.argv[4]
+    generator = JSWrapperGenerator()
+    generator.gen(bindingsCpp, headers, coreBindings)
diff --git a/modules/js/src/helpers.js b/modules/js/src/helpers.js
new file mode 100644 (file)
index 0000000..08d1a89
--- /dev/null
@@ -0,0 +1,399 @@
+// //////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+
+Module['imread'] = function(imageSource) {
+    var img = null;
+    if (typeof imageSource === 'string') {
+        img = document.getElementById(imageSource);
+    } else {
+        img = imageSource;
+    }
+    var canvas = null;
+    var ctx = null;
+    if (img instanceof HTMLImageElement) {
+        canvas = document.createElement('canvas');
+        canvas.width = img.width;
+        canvas.height = img.height;
+        ctx = canvas.getContext('2d');
+        ctx.drawImage(img, 0, 0, img.width, img.height);
+    } else if (img instanceof HTMLCanvasElement) {
+        canvas = img;
+        ctx = canvas.getContext('2d');
+    } else {
+        throw new Error('Please input the valid canvas or img id.');
+        return;
+    }
+
+    var imgData = ctx.getImageData(0, 0, canvas.width, canvas.height);
+    return cv.matFromImageData(imgData);
+};
+
+Module['imshow'] = function(canvasSource, mat) {
+    var canvas = null;
+    if (typeof canvasSource === 'string') {
+        canvas = document.getElementById(canvasSource);
+    } else {
+        canvas = canvasSource;
+    }
+    if (!(canvas instanceof HTMLCanvasElement)) {
+        throw new Error('Please input the valid canvas element or id.');
+        return;
+    }
+    if (!(mat instanceof cv.Mat)) {
+        throw new Error('Please input the valid cv.Mat instance.');
+        return;
+    }
+
+    // convert the mat type to cv.CV_8U
+    var img = new cv.Mat();
+    var depth = mat.type()%8;
+    var scale = depth <= cv.CV_8S? 1.0 : (depth <= cv.CV_32S? 1.0/256.0 : 255.0);
+    var shift = (depth === cv.CV_8S || depth === cv.CV_16S)? 128.0 : 0.0;
+    mat.convertTo(img, cv.CV_8U, scale, shift);
+
+    // convert the img type to cv.CV_8UC4
+    switch (img.type()) {
+        case cv.CV_8UC1:
+            cv.cvtColor(img, img, cv.COLOR_GRAY2RGBA);
+            break;
+        case cv.CV_8UC3:
+            cv.cvtColor(img, img, cv.COLOR_RGB2RGBA);
+            break;
+        case cv.CV_8UC4:
+            break;
+        default:
+            throw new Error('Bad number of channels (Source image must have 1, 3 or 4 channels)');
+            return;
+    }
+    var imgData = new ImageData(new Uint8ClampedArray(img.data), img.cols, img.rows);
+    var ctx = canvas.getContext('2d');
+    ctx.clearRect(0, 0, canvas.width, canvas.height);
+    canvas.width = imgData.width;
+    canvas.height = imgData.height;
+    ctx.putImageData(imgData, 0, 0);
+    img.delete();
+};
+
+Module['VideoCapture'] = function(videoSource) {
+    var video = null;
+    if (typeof videoSource === 'string') {
+        video = document.getElementById(videoSource);
+    } else {
+        video = videoSource;
+    }
+    if (!(video instanceof HTMLVideoElement)) {
+        throw new Error('Please input the valid video element or id.');
+        return;
+    }
+    var canvas = document.createElement('canvas');
+    canvas.width = video.width;
+    canvas.height = video.height;
+    var ctx = canvas.getContext('2d');
+    this.video = video;
+    this.read = function(frame) {
+        if (!(frame instanceof cv.Mat)) {
+            throw new Error('Please input the valid cv.Mat instance.');
+            return;
+        }
+        if (frame.type() !== cv.CV_8UC4) {
+            throw new Error('Bad type of input mat: the type should be cv.CV_8UC4.');
+            return;
+        }
+        if (frame.cols !== video.width || frame.rows !== video.height) {
+            throw new Error('Bad size of input mat: the size should be same as the video.');
+            return;
+        }
+        ctx.drawImage(video, 0, 0, video.width, video.height);
+        frame.data.set(ctx.getImageData(0, 0, video.width, video.height).data);
+    };
+};
+
+function Range(start, end) {
+    this.start = typeof(start) === 'undefined' ? 0 : start;
+    this.end = typeof(end) === 'undefined' ? 0 : end;
+}
+
+Module['Range'] = Range;
+
+function Point(x, y) {
+    this.x = typeof(x) === 'undefined' ? 0 : x;
+    this.y = typeof(y) === 'undefined' ? 0 : y;
+}
+
+Module['Point'] = Point;
+
+function Size(width, height) {
+    this.width = typeof(width) === 'undefined' ? 0 : width;
+    this.height = typeof(height) === 'undefined' ? 0 : height;
+}
+
+Module['Size'] = Size;
+
+function Rect() {
+    switch (arguments.length) {
+        case 0: {
+            // new cv.Rect()
+            this.x = 0;
+            this.y = 0;
+            this.width = 0;
+            this.height = 0;
+            break;
+        }
+        case 1: {
+            // new cv.Rect(rect)
+            var rect = arguments[0];
+            this.x = rect.x;
+            this.y = rect.y;
+            this.width = rect.width;
+            this.height = rect.height;
+            break;
+        }
+        case 2: {
+            // new cv.Rect(point, size)
+            var point = arguments[0];
+            var size = arguments[1];
+            this.x = point.x;
+            this.y = point.y;
+            this.width = size.width;
+            this.height = size.height;
+            break;
+        }
+        case 4: {
+            // new cv.Rect(x, y, width, height)
+            this.x = arguments[0];
+            this.y = arguments[1];
+            this.width = arguments[2];
+            this.height = arguments[3];
+            break;
+        }
+        default: {
+            throw new Error('Invalid arguments');
+        }
+    }
+}
+
+Module['Rect'] = Rect;
+
+function RotatedRect() {
+    switch (arguments.length) {
+        case 0: {
+            this.center = {x: 0, y: 0};
+            this.size = {width: 0, height: 0};
+            this.angle = 0;
+            break;
+        }
+        case 3: {
+            this.center = arguments[0];
+            this.size = arguments[1];
+            this.angle = arguments[2];
+            break;
+        }
+        default: {
+            throw new Error('Invalid arguments');
+        }
+    }
+}
+
+RotatedRect.points = function(obj) {
+    return Module.rotatedRectPoints(obj);
+};
+
+RotatedRect.boundingRect = function(obj) {
+    return Module.rotatedRectBoundingRect(obj);
+};
+
+RotatedRect.boundingRect2f = function(obj) {
+    return Module.rotatedRectBoundingRect2f(obj);
+};
+
+Module['RotatedRect'] = RotatedRect;
+
+function Scalar(v0, v1, v2, v3) {
+    this.push(typeof(v0) === 'undefined' ? 0 : v0);
+    this.push(typeof(v1) === 'undefined' ? 0 : v1);
+    this.push(typeof(v2) === 'undefined' ? 0 : v2);
+    this.push(typeof(v3) === 'undefined' ? 0 : v3);
+}
+
+Scalar.prototype = new Array; // eslint-disable-line no-array-constructor
+
+Scalar.all = function(v) {
+    return new Scalar(v, v, v, v);
+};
+
+Module['Scalar'] = Scalar;
+
+function MinMaxLoc() {
+    switch (arguments.length) {
+        case 0: {
+            this.minVal = 0;
+            this.maxVal = 0;
+            this.minLoc = new Point();
+            this.maxLoc = new Point();
+            break;
+        }
+        case 4: {
+            this.minVal = arguments[0];
+            this.maxVal = arguments[1];
+            this.minLoc = arguments[2];
+            this.maxLoc = arguments[3];
+            break;
+        }
+        default: {
+            throw new Error('Invalid arguments');
+        }
+    }
+}
+
+Module['MinMaxLoc'] = MinMaxLoc;
+
+function Circle() {
+    switch (arguments.length) {
+        case 0: {
+            this.center = new Point();
+            this.radius = 0;
+            break;
+        }
+        case 2: {
+            this.center = arguments[0];
+            this.radius = arguments[1];
+            break;
+        }
+        default: {
+            throw new Error('Invalid arguments');
+        }
+    }
+}
+
+Module['Circle'] = Circle;
+
+function TermCriteria() {
+    switch (arguments.length) {
+        case 0: {
+            this.type = 0;
+            this.maxCount = 0;
+            this.epsilon = 0;
+            break;
+        }
+        case 3: {
+            this.type = arguments[0];
+            this.maxCount = arguments[1];
+            this.epsilon = arguments[2];
+            break;
+        }
+        default: {
+            throw new Error('Invalid arguments');
+        }
+    }
+}
+
+Module['TermCriteria'] = TermCriteria;
+
+Module['matFromArray'] = function(rows, cols, type, array) {
+    var mat = new cv.Mat(rows, cols, type);
+    switch (type) {
+        case cv.CV_8U:
+        case cv.CV_8UC1:
+        case cv.CV_8UC2:
+        case cv.CV_8UC3:
+        case cv.CV_8UC4: {
+            mat.data.set(array);
+            break;
+        }
+        case cv.CV_8S:
+        case cv.CV_8SC1:
+        case cv.CV_8SC2:
+        case cv.CV_8SC3:
+        case cv.CV_8SC4: {
+            mat.data8S.set(array);
+            break;
+        }
+        case cv.CV_16U:
+        case cv.CV_16UC1:
+        case cv.CV_16UC2:
+        case cv.CV_16UC3:
+        case cv.CV_16UC4: {
+            mat.data16U.set(array);
+            break;
+        }
+        case cv.CV_16S:
+        case cv.CV_16SC1:
+        case cv.CV_16SC2:
+        case cv.CV_16SC3:
+        case cv.CV_16SC4: {
+            mat.data16S.set(array);
+            break;
+        }
+        case cv.CV_32S:
+        case cv.CV_32SC1:
+        case cv.CV_32SC2:
+        case cv.CV_32SC3:
+        case cv.CV_32SC4: {
+            mat.data32S.set(array);
+            break;
+        }
+        case cv.CV_32F:
+        case cv.CV_32FC1:
+        case cv.CV_32FC2:
+        case cv.CV_32FC3:
+        case cv.CV_32FC4: {
+            mat.data32F.set(array);
+            break;
+        }
+        case cv.CV_64F:
+        case cv.CV_64FC1:
+        case cv.CV_64FC2:
+        case cv.CV_64FC3:
+        case cv.CV_64FC4: {
+            mat.data64F.set(array);
+            break;
+        }
+        default: {
+            throw new Error('Type is unsupported');
+        }
+    }
+    return mat;
+};
+
+Module['matFromImageData'] = function(imageData) {
+    var mat = new cv.Mat(imageData.height, imageData.width, cv.CV_8UC4);
+    mat.data.set(imageData.data);
+    return mat;
+};
diff --git a/modules/js/src/make_umd.py b/modules/js/src/make_umd.py
new file mode 100644 (file)
index 0000000..4bca6c1
--- /dev/null
@@ -0,0 +1,106 @@
+###############################################################################
+#
+#  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+#
+#  By downloading, copying, installing or using the software you agree to this license.
+#  If you do not agree to this license, do not download, install,
+#  copy or use the software.
+#
+#
+#                           License Agreement
+#                For Open Source Computer Vision Library
+#
+# Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+# Third party copyrights are property of their respective owners.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+#   * Redistribution's of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#   * Redistribution's in binary form must reproduce the above copyright notice,
+#     this list of conditions and the following disclaimer in the documentation
+#     and/or other materials provided with the distribution.
+#
+#   * The name of the copyright holders may not be used to endorse or promote products
+#     derived from this software without specific prior written permission.
+#
+# This software is provided by the copyright holders and contributors "as is" and
+# any express or implied warranties, including, but not limited to, the implied
+# warranties of merchantability and fitness for a particular purpose are disclaimed.
+# In no event shall the Intel Corporation or contributors be liable for any direct,
+# indirect, incidental, special, exemplary, or consequential damages
+# (including, but not limited to, procurement of substitute goods or services;
+# loss of use, data, or profits; or business interruption) however caused
+# and on any theory of liability, whether in contract, strict liability,
+# or tort (including negligence or otherwise) arising in any way out of
+# the use of this software, even if advised of the possibility of such damage.
+#
+
+###############################################################################
+# AUTHOR: Sajjad Taheri, University of California, Irvine. sajjadt[at]uci[dot]edu
+#
+#                             LICENSE AGREEMENT
+# Copyright (c) 2015, 2015 The Regents of the University of California (Regents)
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. Neither the name of the University nor the
+#    names of its contributors may be used to endorse or promote products
+#    derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+###############################################################################
+import os, sys, re, json, shutil
+from subprocess import Popen, PIPE, STDOUT
+
+def make_umd(opencvjs, cvjs):
+    src = open(opencvjs, 'r+b')
+    dst = open(cvjs, 'w+b')
+    content = src.read()
+    dst.seek(0)
+    # inspired by https://github.com/umdjs/umd/blob/95563fd6b46f06bda0af143ff67292e7f6ede6b7/templates/returnExportsGlobal.js
+    dst.write(("""
+(function (root, factory) {
+  if (typeof define === 'function' && define.amd) {
+    // AMD. Register as an anonymous module.
+    define(function () {
+      return (root.cv = factory());
+    });
+  } else if (typeof module === 'object' && module.exports) {
+    // Node. Does not work with strict CommonJS, but
+    // only CommonJS-like environments that support module.exports,
+    // like Node.
+    module.exports = factory();
+  } else {
+    // Browser globals
+    root.cv = factory();
+  }
+}(this, function () {
+  %s
+  if (typeof Module === 'undefined')
+    Module = {};
+  return cv(Module);
+}));
+    """ % (content)).lstrip())
+
+if __name__ == "__main__":
+    if len(sys.argv) > 2:
+        opencvjs = sys.argv[1]
+        cvjs = sys.argv[2]
+        make_umd(opencvjs, cvjs);
diff --git a/modules/js/src/templates.py b/modules/js/src/templates.py
new file mode 100644 (file)
index 0000000..05dc9bb
--- /dev/null
@@ -0,0 +1,192 @@
+###############################################################################
+#
+#  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+#
+#  By downloading, copying, installing or using the software you agree to this license.
+#  If you do not agree to this license, do not download, install,
+#  copy or use the software.
+#
+#
+#                           License Agreement
+#                For Open Source Computer Vision Library
+#
+# Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+# Third party copyrights are property of their respective owners.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+#   * Redistribution's of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#   * Redistribution's in binary form must reproduce the above copyright notice,
+#     this list of conditions and the following disclaimer in the documentation
+#     and/or other materials provided with the distribution.
+#
+#   * The name of the copyright holders may not be used to endorse or promote products
+#     derived from this software without specific prior written permission.
+#
+# This software is provided by the copyright holders and contributors "as is" and
+# any express or implied warranties, including, but not limited to, the implied
+# warranties of merchantability and fitness for a particular purpose are disclaimed.
+# In no event shall the Intel Corporation or contributors be liable for any direct,
+# indirect, incidental, special, exemplary, or consequential damages
+# (including, but not limited to, procurement of substitute goods or services;
+# loss of use, data, or profits; or business interruption) however caused
+# and on any theory of liability, whether in contract, strict liability,
+# or tort (including negligence or otherwise) arising in any way out of
+# the use of this software, even if advised of the possibility of such damage.
+#
+
+###############################################################################
+# AUTHOR: Sajjad Taheri, University of California, Irvine. sajjadt[at]uci[dot]edu
+#
+#                             LICENSE AGREEMENT
+# Copyright (c) 2015, 2015 The Regents of the University of California (Regents)
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. Neither the name of the University nor the
+#    names of its contributors may be used to endorse or promote products
+#    derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+##############################################################################
+
+from string import Template
+
+wrapper_codes_template = Template("namespace $ns {\n$defs\n}")
+
+call_template = Template("""$func($args)""")
+class_call_template = Template("""$obj.$func($args)""")
+static_class_call_template = Template("""$scope$func($args)""")
+
+wrapper_function_template = Template("""    $ret_val $func($signature)$const {
+        return $cpp_call;
+    }
+    """)
+
+wrapper_function_with_def_args_template = Template("""    $ret_val $func($signature)$const {
+        $check_args
+    }
+    """)
+
+wrapper_overload_def_values = [
+    Template("""return $cpp_call;"""), Template("""if ($arg0.isUndefined())
+            return $cpp_call;
+        else
+            $next"""),
+    Template("""if ($arg0.isUndefined() && $arg1.isUndefined())
+            return $cpp_call;
+        else $next"""),
+    Template("""if ($arg0.isUndefined() && $arg1.isUndefined() && $arg2.isUndefined())
+            return $cpp_call;
+        else $next"""),
+    Template("""if ($arg0.isUndefined() && $arg1.isUndefined() && $arg2.isUndefined() && $arg3.isUndefined())
+            return $cpp_call;
+        else $next"""),
+    Template("""if ($arg0.isUndefined() && $arg1.isUndefined() && $arg2.isUndefined() && $arg3.isUndefined() &&
+                    $arg4.isUndefined())
+            return $cpp_call;
+        else $next"""),
+    Template("""if ($arg0.isUndefined() && $arg1.isUndefined() && $arg2.isUndefined() && $arg3.isUndefined() &&
+                    $arg4.isUndefined() && $arg5.isUndefined() )
+            return $cpp_call;
+        else $next"""),
+    Template("""if ($arg0.isUndefined() && $arg1.isUndefined() && $arg2.isUndefined() && $arg3.isUndefined() &&
+                    $arg4.isUndefined() && $arg5.isUndefined() && $arg6.isUndefined() )
+            return $cpp_call;
+        else $next"""),
+    Template("""if ($arg0.isUndefined() && $arg1.isUndefined() && $arg2.isUndefined() && $arg3.isUndefined() &&
+                    $arg4.isUndefined() && $arg5.isUndefined()&& $arg6.isUndefined()  && $arg7.isUndefined())
+            return $cpp_call;
+        else $next"""),
+    Template("""if ($arg0.isUndefined() && $arg1.isUndefined() && $arg2.isUndefined() && $arg3.isUndefined() &&
+                    $arg4.isUndefined() && $arg5.isUndefined()&& $arg6.isUndefined()  && $arg7.isUndefined() &&
+                    $arg8.isUndefined())
+            return $cpp_call;
+        else $next"""),
+    Template("""if ($arg0.isUndefined() && $arg1.isUndefined() && $arg2.isUndefined() && $arg3.isUndefined() &&
+                    $arg4.isUndefined() && $arg5.isUndefined()&& $arg6.isUndefined()  && $arg7.isUndefined()&&
+                    $arg8.isUndefined() && $arg9.isUndefined())
+            return $cpp_call;
+        else $next""")]
+
+emscripten_binding_template = Template("""
+
+EMSCRIPTEN_BINDINGS($binding_name) {$bindings
+}
+""")
+
+simple_function_template = Template("""
+    emscripten::function("$js_name", &$cpp_name);
+""")
+
+smart_ptr_reg_template = Template("""
+        .smart_ptr<Ptr<$cname>>("Ptr<$name>")
+""")
+
+overload_function_template = Template("""
+    function("$js_name", select_overload<$ret($args)$const>(&$cpp_name)$optional);
+""")
+
+overload_class_function_template = Template("""
+        .function("$js_name", select_overload<$ret($args)$const>(&$cpp_name)$optional)""")
+
+overload_class_static_function_template = Template("""
+        .class_function("$js_name", select_overload<$ret($args)$const>(&$cpp_name)$optional)""")
+
+class_property_template = Template("""
+        .property("$js_name", &$cpp_name)""")
+
+ctr_template = Template("""
+        .constructor(select_overload<$ret($args)$const>(&$cpp_name)$optional)""")
+
+smart_ptr_ctr_overload_template = Template("""
+        .smart_ptr_constructor("$ptr_type", select_overload<$ret($args)$const>(&$cpp_name)$optional)""")
+
+function_template = Template("""
+        .function("$js_name", &$cpp_name)""")
+
+static_function_template = Template("""
+        .class_function("$js_name", &$cpp_name)""")
+
+constructor_template = Template("""
+        .constructor<$signature>()""")
+
+enum_item_template = Template("""
+        .value("$val", $cpp_val)""")
+
+enum_template = Template("""
+    emscripten::enum_<$cpp_name>("$js_name")$enum_items;
+""")
+
+const_template = Template("""
+    constant("$js_name", +$value);
+""")
+
+vector_template = Template("""
+     emscripten::register_vector<$cType>("$js_name");
+""")
+
+map_template = Template("""
+     emscripten::register_map<cpp_type_key,$cpp_type_val>("$js_name");
+""")
+
+class_template = Template("""
+    emscripten::class_<$cpp_name $derivation>("$js_name")$class_templates;
+""")
diff --git a/modules/js/test/.eslintrc.json b/modules/js/test/.eslintrc.json
new file mode 100644 (file)
index 0000000..e7d7209
--- /dev/null
@@ -0,0 +1,12 @@
+{
+  "extends": "google",
+  "parserOptions": {
+    "ecmaVersion": 6
+  },
+  "rules": {
+    "max-len": ["error", 100, {"ignoreUrls": true}],
+    "quotes": ["error", "single"],
+    "indent": ["error", 4, {"ArrayExpression": "first",
+                            "CallExpression": {"arguments": "first"}}]
+  }
+}
diff --git a/modules/js/test/package.json b/modules/js/test/package.json
new file mode 100644 (file)
index 0000000..cb303ba
--- /dev/null
@@ -0,0 +1,26 @@
+{
+  "name": "opencv_js_tests",
+  "description": "Tests for opencv js bindings",
+  "version": "1.0.0",
+  "dependencies" : {
+    "qunit" : "latest"
+  },
+  "devDependencies": {
+    "eslint" : "latest",
+    "eslint-config-google" : "latest"
+  },
+  "scripts": {
+    "test": "node tests.js"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/opencv/opencv.git"
+  },
+  "keywords": [],
+  "author": "",
+  "license": "BSD-4-Clause",
+  "bugs": {
+    "url": "https://github.com/opencv/opencv/issues"
+  },
+  "homepage": "https://github.com/opencv/opencv"
+}
diff --git a/modules/js/test/test_imgproc.js b/modules/js/test/test_imgproc.js
new file mode 100644 (file)
index 0000000..214a073
--- /dev/null
@@ -0,0 +1,807 @@
+// //////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//
+
+// //////////////////////////////////////////////////////////////////////////////////////
+// Author: Sajjad Taheri, University of California, Irvine. sajjadt[at]uci[dot]edu
+//
+//                             LICENSE AGREEMENT
+// Copyright (c) 2015 The Regents of the University of California (Regents)
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+// 3. Neither the name of the University nor the
+//    names of its contributors may be used to endorse or promote products
+//    derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+if (typeof module !== 'undefined' && module.exports) {
+    // The envrionment is Node.js
+    var cv = require('./opencv.js'); // eslint-disable-line no-var
+}
+
+QUnit.module('Image Processing', {});
+
+QUnit.test('test_imgProc', function(assert) {
+    // calcHist
+    {
+        let vec1 = new cv.Mat.ones(new cv.Size(20, 20), cv.CV_8UC1); // eslint-disable-line new-cap
+        let source = new cv.MatVector();
+        source.push_back(vec1);
+        let channels = [0];
+        let histSize = [256];
+        let ranges =[0, 256];
+
+        let hist = new cv.Mat();
+        let mask = new cv.Mat();
+        let binSize = cv._malloc(4);
+        let binView = new Int32Array(cv.HEAP8.buffer, binSize);
+        binView[0] = 10;
+        cv.calcHist(source, channels, mask, hist, histSize, ranges, false);
+
+        // hist should contains a N X 1 arrary.
+        let size = hist.size();
+        assert.equal(size.height, 256);
+        assert.equal(size.width, 1);
+
+        // default parameters
+        cv.calcHist(source, channels, mask, hist, histSize, ranges);
+        size = hist.size();
+        assert.equal(size.height, 256);
+        assert.equal(size.width, 1);
+
+        // Do we need to verify data in histogram?
+        // let dataView = hist.data;
+
+        // Free resource
+        cv._free(binSize);
+        mask.delete();
+        hist.delete();
+    }
+
+    // cvtColor
+    {
+        let source = new cv.Mat(10, 10, cv.CV_8UC3);
+        let dest = new cv.Mat();
+
+        cv.cvtColor(source, dest, cv.COLOR_BGR2GRAY, 0);
+        assert.equal(dest.channels(), 1);
+
+        cv.cvtColor(source, dest, cv.COLOR_BGR2GRAY);
+        assert.equal(dest.channels(), 1);
+
+        cv.cvtColor(source, dest, cv.COLOR_BGR2BGRA, 0);
+        assert.equal(dest.channels(), 4);
+
+        cv.cvtColor(source, dest, cv.COLOR_BGR2BGRA);
+        assert.equal(dest.channels(), 4);
+
+        dest.delete();
+        source.delete();
+    }
+    // equalizeHist
+    {
+        let source = new cv.Mat(10, 10, cv.CV_8UC1);
+        let dest = new cv.Mat();
+
+        cv.equalizeHist(source, dest);
+
+        // eualizeHist changes the content of a image, but does not alter meta data
+        // of it.
+        assert.equal(source.channels(), dest.channels());
+        assert.equal(source.type(), dest.type());
+
+        dest.delete();
+        source.delete();
+    }
+});
+
+QUnit.test('test_segmentation', function(assert) {
+    const THRESHOLD = 127.0;
+    const THRESHOLD_MAX = 210.0;
+
+    // threshold
+    {
+        let source = new cv.Mat(1, 5, cv.CV_8UC1);
+        let sourceView = source.data;
+        sourceView[0] = 0; // < threshold
+        sourceView[1] = 100; // < threshold
+        sourceView[2] = 200; // > threshold
+
+        let dest = new cv.Mat();
+
+        cv.threshold(source, dest, THRESHOLD, THRESHOLD_MAX, cv.THRESH_BINARY);
+
+        let destView = dest.data;
+        assert.equal(destView[0], 0);
+        assert.equal(destView[1], 0);
+        assert.equal(destView[2], THRESHOLD_MAX);
+    }
+
+    // adaptiveThreshold
+    {
+        let source = cv.Mat.zeros(1, 5, cv.CV_8UC1);
+        let sourceView = source.data;
+        sourceView[0] = 50;
+        sourceView[1] = 150;
+        sourceView[2] = 200;
+
+        let dest = new cv.Mat();
+        const C = 0;
+        const blockSize = 3;
+        cv.adaptiveThreshold(source, dest, THRESHOLD_MAX,
+                             cv.ADAPTIVE_THRESH_MEAN_C, cv.THRESH_BINARY, blockSize, C);
+
+        let destView = dest.data;
+        assert.equal(destView[0], 0);
+        assert.equal(destView[1], THRESHOLD_MAX);
+        assert.equal(destView[2], THRESHOLD_MAX);
+    }
+});
+
+QUnit.test('test_shape', function(assert) {
+    // moments
+    {
+        let points = new cv.Mat(1, 4, cv.CV_32SC2);
+        let data32S = points.data32S;
+        data32S[0]=50;
+        data32S[1]=56;
+        data32S[2]=53;
+        data32S[3]=53;
+        data32S[4]=46;
+        data32S[5]=54;
+        data32S[6]=49;
+        data32S[7]=51;
+
+        let m = cv.moments(points, false);
+        let area = cv.contourArea(points, false);
+
+        assert.equal(m.m00, 0);
+        assert.equal(m.m01, 0);
+        assert.equal(m.m10, 0);
+        assert.equal(area, 0);
+
+        // default parameters
+        m = cv.moments(points);
+        area = cv.contourArea(points);
+        assert.equal(m.m00, 0);
+        assert.equal(m.m01, 0);
+        assert.equal(m.m10, 0);
+        assert.equal(area, 0);
+
+        points.delete();
+    }
+});
+
+QUnit.test('test_min_enclosing', function(assert) {
+    {
+        let points = new cv.Mat(4, 1, cv.CV_32FC2);
+
+        points.data32F[0] = 0;
+        points.data32F[1] = 0;
+        points.data32F[2] = 1;
+        points.data32F[3] = 0;
+        points.data32F[4] = 1;
+        points.data32F[5] = 1;
+        points.data32F[6] = 0;
+        points.data32F[7] = 1;
+
+        let circle = cv.minEnclosingCircle(points);
+
+        assert.deepEqual(circle.center, {x: 0.5, y: 0.5});
+        assert.ok(Math.abs(circle.radius - Math.sqrt(2) / 2) < 0.001);
+
+        points.delete();
+    }
+});
+
+QUnit.test('test_filter', function(assert) {
+    // blur
+    {
+        let mat1 = cv.Mat.ones(5, 5, cv.CV_8UC3);
+        let mat2 = new cv.Mat();
+
+        cv.blur(mat1, mat2, {height: 3, width: 3}, {x: -1, y: -1}, cv.BORDER_DEFAULT);
+
+        // Verify result.
+        let size = mat2.size();
+        assert.equal(mat2.channels(), 3);
+        assert.equal(size.height, 5);
+        assert.equal(size.width, 5);
+
+        cv.blur(mat1, mat2, {height: 3, width: 3}, {x: -1, y: -1});
+
+        // Verify result.
+        size = mat2.size();
+        assert.equal(mat2.channels(), 3);
+        assert.equal(size.height, 5);
+        assert.equal(size.width, 5);
+
+        cv.blur(mat1, mat2, {height: 3, width: 3});
+
+        // Verify result.
+        size = mat2.size();
+        assert.equal(mat2.channels(), 3);
+        assert.equal(size.height, 5);
+        assert.equal(size.width, 5);
+
+        mat1.delete();
+        mat2.delete();
+    }
+
+    // GaussianBlur
+    {
+        let mat1 = cv.Mat.ones(7, 7, cv.CV_8UC1);
+        let mat2 = new cv.Mat();
+
+        cv.GaussianBlur(mat1, mat2, new cv.Size(3, 3), 0, 0, // eslint-disable-line new-cap
+                        cv.BORDER_DEFAULT);
+
+        // Verify result.
+        let size = mat2.size();
+        assert.equal(mat2.channels(), 1);
+        assert.equal(size.height, 7);
+        assert.equal(size.width, 7);
+    }
+
+    // medianBlur
+    {
+        let mat1 = cv.Mat.ones(9, 9, cv.CV_8UC3);
+        let mat2 = new cv.Mat();
+
+        cv.medianBlur(mat1, mat2, 3);
+
+        // Verify result.
+        let size = mat2.size();
+        assert.equal(mat2.channels(), 3);
+        assert.equal(size.height, 9);
+        assert.equal(size.width, 9);
+    }
+
+    // Transpose
+    {
+        let mat1 = cv.Mat.eye(9, 9, cv.CV_8UC3);
+        let mat2 = new cv.Mat();
+
+        cv.transpose(mat1, mat2);
+
+        // Verify result.
+        let size = mat2.size();
+        assert.equal(mat2.channels(), 3);
+        assert.equal(size.height, 9);
+        assert.equal(size.width, 9);
+    }
+
+    // bilateralFilter
+    {
+        let mat1 = cv.Mat.ones(11, 11, cv.CV_8UC3);
+        let mat2 = new cv.Mat();
+
+        cv.bilateralFilter(mat1, mat2, 3, 6, 1.5, cv.BORDER_DEFAULT);
+
+        // Verify result.
+        let size = mat2.size();
+        assert.equal(mat2.channels(), 3);
+        assert.equal(size.height, 11);
+        assert.equal(size.width, 11);
+
+        // default parameters
+        cv.bilateralFilter(mat1, mat2, 3, 6, 1.5);
+        // Verify result.
+        size = mat2.size();
+        assert.equal(mat2.channels(), 3);
+        assert.equal(size.height, 11);
+        assert.equal(size.width, 11);
+
+        mat1.delete();
+        mat2.delete();
+    }
+
+    // Watershed
+    {
+        let mat = cv.Mat.ones(11, 11, cv.CV_8UC3);
+        let out = new cv.Mat(11, 11, cv.CV_32SC1);
+
+        cv.watershed(mat, out);
+
+        // Verify result.
+        let size = out.size();
+        assert.equal(out.channels(), 1);
+        assert.equal(size.height, 11);
+        assert.equal(size.width, 11);
+        assert.equal(out.elemSize1(), 4);
+
+        mat.delete();
+        out.delete();
+    }
+
+    // Concat
+    {
+        let mat = cv.Mat.ones({height: 10, width: 5}, cv.CV_8UC3);
+        let mat2 = cv.Mat.eye({height: 10, width: 5}, cv.CV_8UC3);
+        let mat3 = cv.Mat.eye({height: 10, width: 5}, cv.CV_8UC3);
+
+
+        let out = new cv.Mat();
+        let input = new cv.MatVector();
+        input.push_back(mat);
+        input.push_back(mat2);
+        input.push_back(mat3);
+
+        cv.vconcat(input, out);
+
+        // Verify result.
+        let size = out.size();
+        assert.equal(out.channels(), 3);
+        assert.equal(size.height, 30);
+        assert.equal(size.width, 5);
+        assert.equal(out.elemSize1(), 1);
+
+        cv.hconcat(input, out);
+
+        // Verify result.
+        size = out.size();
+        assert.equal(out.channels(), 3);
+        assert.equal(size.height, 10);
+        assert.equal(size.width, 15);
+        assert.equal(out.elemSize1(), 1);
+
+        input.delete();
+        out.delete();
+    }
+
+
+    // distanceTransform letiants
+    {
+        let mat = cv.Mat.ones(11, 11, cv.CV_8UC1);
+        let out = new cv.Mat(11, 11, cv.CV_32FC1);
+        let labels = new cv.Mat(11, 11, cv.CV_32FC1);
+        const maskSize = 3;
+        cv.distanceTransform(mat, out, cv.DIST_L2, maskSize, cv.CV_32F);
+
+        // Verify result.
+        let size = out.size();
+        assert.equal(out.channels(), 1);
+        assert.equal(size.height, 11);
+        assert.equal(size.width, 11);
+        assert.equal(out.elemSize1(), 4);
+
+
+        cv.distanceTransformWithLabels(mat, out, labels, cv.DIST_L2, maskSize,
+                                       cv.DIST_LABEL_CCOMP);
+
+        // Verify result.
+        size = out.size();
+        assert.equal(out.channels(), 1);
+        assert.equal(size.height, 11);
+        assert.equal(size.width, 11);
+        assert.equal(out.elemSize1(), 4);
+
+        size = labels.size();
+        assert.equal(labels.channels(), 1);
+        assert.equal(size.height, 11);
+        assert.equal(size.width, 11);
+        assert.equal(labels.elemSize1(), 4);
+
+        mat.delete();
+        out.delete();
+        labels.delete();
+    }
+
+    // Min, Max
+    {
+        let data1 = new Uint8Array([1, 2, 3, 4, 5, 6, 7, 8, 9]);
+        let data2 = new Uint8Array([0, 4, 0, 8, 0, 12, 0, 16, 0]);
+
+        let expectedMin = new Uint8Array([0, 2, 0, 4, 0, 6, 0, 8, 0]);
+        let expectedMax = new Uint8Array([1, 4, 3, 8, 5, 12, 7, 16, 9]);
+
+        let dataPtr = cv._malloc(3*3*1);
+        let dataPtr2 = cv._malloc(3*3*1);
+
+        let dataHeap = new Uint8Array(cv.HEAPU8.buffer, dataPtr, 3*3*1);
+        dataHeap.set(new Uint8Array(data1.buffer));
+
+        let dataHeap2 = new Uint8Array(cv.HEAPU8.buffer, dataPtr2, 3*3*1);
+        dataHeap2.set(new Uint8Array(data2.buffer));
+
+
+        let mat1 = new cv.Mat(3, 3, cv.CV_8UC1, dataPtr, 0);
+        let mat2 = new cv.Mat(3, 3, cv.CV_8UC1, dataPtr2, 0);
+
+        let mat3 = new cv.Mat();
+
+        cv.min(mat1, mat2, mat3);
+        // Verify result.
+        let size = mat2.size();
+        assert.equal(mat2.channels(), 1);
+        assert.equal(size.height, 3);
+        assert.equal(size.width, 3);
+
+        assert.deepEqual(mat3.data, expectedMin);
+
+
+        cv.max(mat1, mat2, mat3);
+        // Verify result.
+        size = mat2.size();
+        assert.equal(mat2.channels(), 1);
+        assert.equal(size.height, 3);
+        assert.equal(size.width, 3);
+
+        assert.deepEqual(mat3.data, expectedMax);
+
+        cv._free(dataPtr);
+        cv._free(dataPtr2);
+    }
+
+    // Bitwise operations
+    {
+        let data1 = new Uint8Array([0, 1, 2, 4, 8, 16, 32, 64, 128]);
+        let data2 = new Uint8Array([255, 255, 255, 255, 255, 255, 255, 255, 255]);
+
+        let expectedAnd = new Uint8Array([0, 1, 2, 4, 8, 16, 32, 64, 128]);
+        let expectedOr = new Uint8Array([255, 255, 255, 255, 255, 255, 255, 255, 255]);
+        let expectedXor = new Uint8Array([255, 254, 253, 251, 247, 239, 223, 191, 127]);
+
+        let expectedNot = new Uint8Array([255, 254, 253, 251, 247, 239, 223, 191, 127]);
+
+        let dataPtr = cv._malloc(3*3*1);
+        let dataPtr2 = cv._malloc(3*3*1);
+
+        let dataHeap = new Uint8Array(cv.HEAPU8.buffer, dataPtr, 3*3*1);
+        dataHeap.set(new Uint8Array(data1.buffer));
+
+        let dataHeap2 = new Uint8Array(cv.HEAPU8.buffer, dataPtr2, 3*3*1);
+        dataHeap2.set(new Uint8Array(data2.buffer));
+
+
+        let mat1 = new cv.Mat(3, 3, cv.CV_8UC1, dataPtr, 0);
+        let mat2 = new cv.Mat(3, 3, cv.CV_8UC1, dataPtr2, 0);
+
+        let mat3 = new cv.Mat();
+        let none = new cv.Mat();
+
+        cv.bitwise_not(mat1, mat3, none);
+        // Verify result.
+        let size = mat3.size();
+        assert.equal(mat3.channels(), 1);
+        assert.equal(size.height, 3);
+        assert.equal(size.width, 3);
+
+        assert.deepEqual(mat3.data, expectedNot);
+
+        cv.bitwise_and(mat1, mat2, mat3, none);
+        // Verify result.
+        size = mat3.size();
+        assert.equal(mat3.channels(), 1);
+        assert.equal(size.height, 3);
+        assert.equal(size.width, 3);
+
+        assert.deepEqual(mat3.data, expectedAnd);
+
+
+        cv.bitwise_or(mat1, mat2, mat3, none);
+        // Verify result.
+        size = mat3.size();
+        assert.equal(mat3.channels(), 1);
+        assert.equal(size.height, 3);
+        assert.equal(size.width, 3);
+
+        assert.deepEqual(mat3.data, expectedOr);
+
+        cv.bitwise_xor(mat1, mat2, mat3, none);
+        // Verify result.
+        size = mat3.size();
+        assert.equal(mat3.channels(), 1);
+        assert.equal(size.height, 3);
+        assert.equal(size.width, 3);
+
+        assert.deepEqual(mat3.data, expectedXor);
+
+        cv._free(dataPtr);
+        cv._free(dataPtr2);
+    }
+
+    // Arithmatic operations
+    {
+        let data1 = new Uint8Array([0, 1, 2, 3, 4, 5, 6, 7, 8]);
+        let data2 = new Uint8Array([0, 2, 4, 6, 8, 10, 12, 14, 16]);
+        let data3 = new Uint8Array([0, 1, 0, 1, 0, 1, 0, 1, 0]);
+
+        // |data1 - data2|
+        let expectedAbsDiff = new Uint8Array([0, 1, 2, 3, 4, 5, 6, 7, 8]);
+        let expectedAdd = new Uint8Array([0, 3, 6, 9, 12, 15, 18, 21, 24]);
+
+        const alpha = 4;
+        const beta = -1;
+        const gamma = 3;
+        // 4*data1 - data2 + 3
+        let expectedWeightedAdd = new Uint8Array([3, 5, 7, 9, 11, 13, 15, 17, 19]);
+
+        let dataPtr = cv._malloc(3*3*1);
+        let dataPtr2 = cv._malloc(3*3*1);
+        let dataPtr3 = cv._malloc(3*3*1);
+
+        let dataHeap = new Uint8Array(cv.HEAPU8.buffer, dataPtr, 3*3*1);
+        dataHeap.set(new Uint8Array(data1.buffer));
+        let dataHeap2 = new Uint8Array(cv.HEAPU8.buffer, dataPtr2, 3*3*1);
+        dataHeap2.set(new Uint8Array(data2.buffer));
+        let dataHeap3 = new Uint8Array(cv.HEAPU8.buffer, dataPtr3, 3*3*1);
+        dataHeap3.set(new Uint8Array(data3.buffer));
+
+        let mat1 = new cv.Mat(3, 3, cv.CV_8UC1, dataPtr, 0);
+        let mat2 = new cv.Mat(3, 3, cv.CV_8UC1, dataPtr2, 0);
+        let mat3 = new cv.Mat(3, 3, cv.CV_8UC1, dataPtr3, 0);
+
+        let dst = new cv.Mat();
+        let none = new cv.Mat();
+
+        cv.absdiff(mat1, mat2, dst);
+        // Verify result.
+        let size = dst.size();
+        assert.equal(dst.channels(), 1);
+        assert.equal(size.height, 3);
+        assert.equal(size.width, 3);
+
+        assert.deepEqual(dst.data, expectedAbsDiff);
+
+        cv.add(mat1, mat2, dst, none, -1);
+        // Verify result.
+        size = dst.size();
+        assert.equal(dst.channels(), 1);
+        assert.equal(size.height, 3);
+        assert.equal(size.width, 3);
+
+        assert.deepEqual(dst.data, expectedAdd);
+
+        cv.addWeighted(mat1, alpha, mat2, beta, gamma, dst, -1);
+        // Verify result.
+        size = dst.size();
+        assert.equal(dst.channels(), 1);
+        assert.equal(size.height, 3);
+        assert.equal(size.width, 3);
+
+        assert.deepEqual(dst.data, expectedWeightedAdd);
+
+        // default parameter
+        cv.addWeighted(mat1, alpha, mat2, beta, gamma, dst);
+        // Verify result.
+        size = dst.size();
+        assert.equal(dst.channels(), 1);
+        assert.equal(size.height, 3);
+        assert.equal(size.width, 3);
+
+        assert.deepEqual(dst.data, expectedWeightedAdd);
+
+        mat1.delete();
+        mat2.delete();
+        mat3.delete();
+        dst.delete();
+        none.delete();
+    }
+
+    // Integral letiants
+    {
+        let mat = cv.Mat.eye({height: 100, width: 100}, cv.CV_8UC3);
+        let sum = new cv.Mat();
+        let sqSum = new cv.Mat();
+        let title = new cv.Mat();
+
+        cv.integral(mat, sum, -1);
+
+        // Verify result.
+        let size = sum.size();
+        assert.equal(sum.channels(), 3);
+        assert.equal(size.height, 100+1);
+        assert.equal(size.width, 100+1);
+
+        cv.integral2(mat, sum, sqSum, -1, -1);
+        // Verify result.
+        size = sum.size();
+        assert.equal(sum.channels(), 3);
+        assert.equal(size.height, 100+1);
+        assert.equal(size.width, 100+1);
+
+        size = sqSum.size();
+        assert.equal(sqSum.channels(), 3);
+        assert.equal(size.height, 100+1);
+        assert.equal(size.width, 100+1);
+
+        mat.delete();
+        sum.delete();
+        sqSum.delete();
+        title.delete();
+    }
+
+    // Mean, meanSTDev
+    {
+        let mat = cv.Mat.eye({height: 100, width: 100}, cv.CV_8UC3);
+        let sum = new cv.Mat();
+        let sqSum = new cv.Mat();
+        let title = new cv.Mat();
+
+        cv.integral(mat, sum, -1);
+
+        // Verify result.
+        let size = sum.size();
+        assert.equal(sum.channels(), 3);
+        assert.equal(size.height, 100+1);
+        assert.equal(size.width, 100+1);
+
+        cv.integral2(mat, sum, sqSum, -1, -1);
+        // Verify result.
+        size = sum.size();
+        assert.equal(sum.channels(), 3);
+        assert.equal(size.height, 100+1);
+        assert.equal(size.width, 100+1);
+
+        size = sqSum.size();
+        assert.equal(sqSum.channels(), 3);
+        assert.equal(size.height, 100+1);
+        assert.equal(size.width, 100+1);
+
+        mat.delete();
+        sum.delete();
+        sqSum.delete();
+        title.delete();
+    }
+
+    // Invert
+    {
+        let inv1 = new cv.Mat();
+        let inv2 = new cv.Mat();
+        let inv3 = new cv.Mat();
+        let inv4 = new cv.Mat();
+
+
+        let data1 = new Float32Array([1, 0, 0,
+                                      0, 1, 0,
+                                      0, 0, 1]);
+        let data2 = new Float32Array([0, 0, 0,
+                                      0, 5, 0,
+                                      0, 0, 0]);
+        let data3 = new Float32Array([1, 1, 1, 0,
+                                      0, 3, 1, 2,
+                                      2, 3, 1, 0,
+                                      1, 0, 2, 1]);
+        let data4 = new Float32Array([1, 4, 5,
+                                      4, 2, 2,
+                                      5, 2, 2]);
+
+        let expected1 = new Float32Array([1, 0, 0,
+                                          0, 1, 0,
+                                          0, 0, 1]);
+        // Inverse does not exist!
+        let expected3 = new Float32Array([-3, -1/2, 3/2, 1,
+                                          1, 1/4, -1/4, -1/2,
+                                          3, 1/4, -5/4, -1/2,
+                                          -3, 0, 1, 1]);
+        let expected4 = new Float32Array([0, -1, 1,
+                                          -1, 23/2, -9,
+                                          1, -9, 7]);
+
+        let dataPtr1 = cv._malloc(3*3*4);
+        let dataPtr2 = cv._malloc(3*3*4);
+        let dataPtr3 = cv._malloc(4*4*4);
+        let dataPtr4 = cv._malloc(3*3*4);
+
+        let dataHeap = new Float32Array(cv.HEAP32.buffer, dataPtr1, 3*3);
+        dataHeap.set(new Float32Array(data1.buffer));
+        let dataHeap2 = new Float32Array(cv.HEAP32.buffer, dataPtr2, 3*3);
+        dataHeap2.set(new Float32Array(data2.buffer));
+        let dataHeap3 = new Float32Array(cv.HEAP32.buffer, dataPtr3, 4*4);
+        dataHeap3.set(new Float32Array(data3.buffer));
+        let dataHeap4 = new Float32Array(cv.HEAP32.buffer, dataPtr4, 3*3);
+        dataHeap4.set(new Float32Array(data4.buffer));
+
+        let mat1 = new cv.Mat(3, 3, cv.CV_32FC1, dataPtr1, 0);
+        let mat2 = new cv.Mat(3, 3, cv.CV_32FC1, dataPtr2, 0);
+        let mat3 = new cv.Mat(4, 4, cv.CV_32FC1, dataPtr3, 0);
+        let mat4 = new cv.Mat(3, 3, cv.CV_32FC1, dataPtr4, 0);
+
+        QUnit.assert.deepEqualWithTolerance = function( value, expected, tolerance ) {
+            for (let i = 0; i < value.length; i= i+1) {
+                this.pushResult( {
+                    result: Math.abs(value[i]-expected[i]) < tolerance,
+                    actual: value[i],
+                    expected: expected[i],
+                } );
+            }
+        };
+
+        cv.invert(mat1, inv1, 0);
+        // Verify result.
+        let size = inv1.size();
+        assert.equal(inv1.channels(), 1);
+        assert.equal(size.height, 3);
+        assert.equal(size.width, 3);
+        assert.deepEqualWithTolerance(inv1.data32F, expected1, 0.0001);
+
+
+        cv.invert(mat2, inv2, 0);
+        // Verify result.
+        assert.deepEqualWithTolerance(inv3.data32F, expected3, 0.0001);
+
+        cv.invert(mat3, inv3, 0);
+        // Verify result.
+        size = inv3.size();
+        assert.equal(inv3.channels(), 1);
+        assert.equal(size.height, 4);
+        assert.equal(size.width, 4);
+        assert.deepEqualWithTolerance(inv3.data32F, expected3, 0.0001);
+
+        cv.invert(mat3, inv3, 1);
+        // Verify result.
+        assert.deepEqualWithTolerance(inv3.data32F, expected3, 0.0001);
+
+        cv.invert(mat4, inv4, 2);
+        // Verify result.
+        assert.deepEqualWithTolerance(inv4.data32F, expected4, 0.0001);
+
+        cv.invert(mat4, inv4, 3);
+        // Verify result.
+        assert.deepEqualWithTolerance(inv4.data32F, expected4, 0.0001);
+
+        mat1.delete();
+        mat2.delete();
+        mat3.delete();
+        mat4.delete();
+        inv1.delete();
+        inv2.delete();
+        inv3.delete();
+        inv4.delete();
+    }
+});
diff --git a/modules/js/test/test_mat.js b/modules/js/test/test_mat.js
new file mode 100644 (file)
index 0000000..2572fbd
--- /dev/null
@@ -0,0 +1,987 @@
+// //////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//
+
+// //////////////////////////////////////////////////////////////////////////////////////
+// Author: Sajjad Taheri, University of California, Irvine. sajjadt[at]uci[dot]edu
+//
+//                             LICENSE AGREEMENT
+// Copyright (c) 2015 The Regents of the University of California (Regents)
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+// 3. Neither the name of the University nor the
+//    names of its contributors may be used to endorse or promote products
+//    derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+if (typeof module !== 'undefined' && module.exports) {
+    // The envrionment is Node.js
+    var cv = require('./opencv.js'); // eslint-disable-line no-var
+}
+
+QUnit.module('Core', {});
+
+QUnit.test('test_mat_creation', function(assert) {
+    // Mat constructors.
+    // Mat::Mat(int rows, int cols, int type)
+    {
+        let mat = new cv.Mat(10, 20, cv.CV_8UC3);
+
+        assert.equal(mat.type(), cv.CV_8UC3);
+        assert.equal(mat.depth(), cv.CV_8U);
+        assert.equal(mat.channels(), 3);
+        assert.ok(mat.empty() === false);
+
+        let size = mat.size();
+        assert.equal(size.height, 10);
+        assert.equal(size.width, 20);
+
+        mat.delete();
+    }
+
+    // Mat::Mat(const Mat &)
+    {
+        // Copy from another Mat
+        let mat1 = new cv.Mat(10, 20, cv.CV_8UC3);
+        let mat2 = new cv.Mat(mat1);
+
+        assert.equal(mat2.type(), mat1.type());
+        assert.equal(mat2.depth(), mat1.depth());
+        assert.equal(mat2.channels(), mat1.channels());
+        assert.equal(mat2.empty(), mat1.empty());
+
+        let size1 = mat1.size;
+        let size2 = mat2.size();
+        assert.ok(size1[0] === size2[0]);
+        assert.ok(size1[1] === size2[1]);
+
+        mat1.delete();
+        mat2.delete();
+    }
+
+    // Mat::Mat(int rows, int cols, int type, void *data, size_t step=AUTO_STEP)
+    {
+        // 10 * 10 and one channel
+        let data = cv._malloc(10 * 10 * 1);
+        let mat = new cv.Mat(10, 10, cv.CV_8UC1, data, 0);
+
+        assert.equal(mat.type(), cv.CV_8UC1);
+        assert.equal(mat.depth(), cv.CV_8U);
+        assert.equal(mat.channels(), 1);
+        assert.ok(mat.empty() === false);
+
+        let size = mat.size();
+        assert.ok(size.height === 10);
+        assert.ok(size.width === 10);
+
+        mat.delete();
+    }
+
+    // Mat::Mat(int rows, int cols, int type, const Scalar& scalar)
+    {
+        // 2 * 2 8UC4 mat
+        let mat = new cv.Mat(2, 2, cv.CV_8UC4, [0, 1, 2, 3]);
+
+        for (let r = 0; r < mat.rows; r++) {
+            for (let c = 0; c < mat.cols; c++) {
+                let element = mat.ptr(r, c);
+                assert.equal(element[0], 0);
+                assert.equal(element[1], 1);
+                assert.equal(element[2], 2);
+                assert.equal(element[3], 3);
+            }
+        }
+
+        mat.delete();
+    }
+
+    //  Mat::create(int, int, int)
+    {
+        let mat = new cv.Mat();
+        mat.create(10, 5, cv.CV_8UC3);
+        let size = mat.size();
+
+        assert.ok(mat.type() === cv.CV_8UC3);
+        assert.ok(size.height === 10);
+        assert.ok(size.width === 5);
+        assert.ok(mat.channels() === 3);
+
+        mat.delete();
+    }
+    //  Mat::create(Size, int)
+    {
+        let mat = new cv.Mat();
+        mat.create({height: 10, width: 5}, cv.CV_8UC4);
+        let size = mat.size();
+
+        assert.ok(mat.type() === cv.CV_8UC4);
+        assert.ok(size.height === 10);
+        assert.ok(size.width === 5);
+        assert.ok(mat.channels() === 4);
+
+        mat.delete();
+    }
+    //   clone
+    {
+        let mat = cv.Mat.ones(5, 5, cv.CV_8UC1);
+        let mat2 = mat.clone();
+
+        assert.equal(mat.channels, mat2.channels);
+        assert.equal(mat.size().height, mat2.size().height);
+        assert.equal(mat.size().width, mat2.size().width);
+
+        assert.deepEqual(mat.data, mat2.data);
+
+
+        mat.delete();
+        mat2.delete();
+    }
+    // copyTo
+    {
+        let mat = cv.Mat.ones(5, 5, cv.CV_8UC1);
+        let mat2 = new cv.Mat();
+        mat.copyTo(mat2);
+
+        assert.equal(mat.channels, mat2.channels);
+        assert.equal(mat.size().height, mat2.size().height);
+        assert.equal(mat.size().width, mat2.size().width);
+
+        assert.deepEqual(mat.data, mat2.data);
+
+
+        mat.delete();
+        mat2.delete();
+    }
+    // copyTo1
+    {
+        let mat = cv.Mat.ones(5, 5, cv.CV_8UC1);
+        let mat2 = new cv.Mat();
+        let mask = new cv.Mat(5, 5, cv.CV_8UC1, new cv.Scalar(1));
+        mat.copyTo(mat2, mask);
+
+        assert.equal(mat.channels, mat2.channels);
+        assert.equal(mat.size().height, mat2.size().height);
+        assert.equal(mat.size().width, mat2.size().width);
+
+        assert.deepEqual(mat.data, mat2.data);
+
+
+        mat.delete();
+        mat2.delete();
+        mask.delete();
+    }
+
+    // matFromArray
+    {
+        let arrayC1 = [0, -1, 2, -3];
+        let arrayC2 = [0, -1, 2, -3, 4, -5, 6, -7];
+        let arrayC3 = [0, -1, 2, -3, 4, -5, 6, -7, 9, -9, 10, -11];
+        let arrayC4 = [0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, 13, 14, 15];
+
+        let mat8UC1 = cv.matFromArray(2, 2, cv.CV_8UC1, arrayC1);
+        let mat8UC2 = cv.matFromArray(2, 2, cv.CV_8UC2, arrayC2);
+        let mat8UC3 = cv.matFromArray(2, 2, cv.CV_8UC3, arrayC3);
+        let mat8UC4 = cv.matFromArray(2, 2, cv.CV_8UC4, arrayC4);
+
+        let mat8SC1 = cv.matFromArray(2, 2, cv.CV_8SC1, arrayC1);
+        let mat8SC2 = cv.matFromArray(2, 2, cv.CV_8SC2, arrayC2);
+        let mat8SC3 = cv.matFromArray(2, 2, cv.CV_8SC3, arrayC3);
+        let mat8SC4 = cv.matFromArray(2, 2, cv.CV_8SC4, arrayC4);
+
+        let mat16UC1 = cv.matFromArray(2, 2, cv.CV_16UC1, arrayC1);
+        let mat16UC2 = cv.matFromArray(2, 2, cv.CV_16UC2, arrayC2);
+        let mat16UC3 = cv.matFromArray(2, 2, cv.CV_16UC3, arrayC3);
+        let mat16UC4 = cv.matFromArray(2, 2, cv.CV_16UC4, arrayC4);
+
+        let mat16SC1 = cv.matFromArray(2, 2, cv.CV_16SC1, arrayC1);
+        let mat16SC2 = cv.matFromArray(2, 2, cv.CV_16SC2, arrayC2);
+        let mat16SC3 = cv.matFromArray(2, 2, cv.CV_16SC3, arrayC3);
+        let mat16SC4 = cv.matFromArray(2, 2, cv.CV_16SC4, arrayC4);
+
+        let mat32SC1 = cv.matFromArray(2, 2, cv.CV_32SC1, arrayC1);
+        let mat32SC2 = cv.matFromArray(2, 2, cv.CV_32SC2, arrayC2);
+        let mat32SC3 = cv.matFromArray(2, 2, cv.CV_32SC3, arrayC3);
+        let mat32SC4 = cv.matFromArray(2, 2, cv.CV_32SC4, arrayC4);
+
+        let mat32FC1 = cv.matFromArray(2, 2, cv.CV_32FC1, arrayC1);
+        let mat32FC2 = cv.matFromArray(2, 2, cv.CV_32FC2, arrayC2);
+        let mat32FC3 = cv.matFromArray(2, 2, cv.CV_32FC3, arrayC3);
+        let mat32FC4 = cv.matFromArray(2, 2, cv.CV_32FC4, arrayC4);
+
+        let mat64FC1 = cv.matFromArray(2, 2, cv.CV_64FC1, arrayC1);
+        let mat64FC2 = cv.matFromArray(2, 2, cv.CV_64FC2, arrayC2);
+        let mat64FC3 = cv.matFromArray(2, 2, cv.CV_64FC3, arrayC3);
+        let mat64FC4 = cv.matFromArray(2, 2, cv.CV_64FC4, arrayC4);
+
+        assert.deepEqual(mat8UC1.data, new Uint8Array(arrayC1));
+        assert.deepEqual(mat8UC2.data, new Uint8Array(arrayC2));
+        assert.deepEqual(mat8UC3.data, new Uint8Array(arrayC3));
+        assert.deepEqual(mat8UC4.data, new Uint8Array(arrayC4));
+
+        assert.deepEqual(mat8SC1.data8S, new Int8Array(arrayC1));
+        assert.deepEqual(mat8SC2.data8S, new Int8Array(arrayC2));
+        assert.deepEqual(mat8SC3.data8S, new Int8Array(arrayC3));
+        assert.deepEqual(mat8SC4.data8S, new Int8Array(arrayC4));
+
+        assert.deepEqual(mat16UC1.data16U, new Uint16Array(arrayC1));
+        assert.deepEqual(mat16UC2.data16U, new Uint16Array(arrayC2));
+        assert.deepEqual(mat16UC3.data16U, new Uint16Array(arrayC3));
+        assert.deepEqual(mat16UC4.data16U, new Uint16Array(arrayC4));
+
+        assert.deepEqual(mat16SC1.data16S, new Int16Array(arrayC1));
+        assert.deepEqual(mat16SC2.data16S, new Int16Array(arrayC2));
+        assert.deepEqual(mat16SC3.data16S, new Int16Array(arrayC3));
+        assert.deepEqual(mat16SC4.data16S, new Int16Array(arrayC4));
+
+        assert.deepEqual(mat32SC1.data32S, new Int32Array(arrayC1));
+        assert.deepEqual(mat32SC2.data32S, new Int32Array(arrayC2));
+        assert.deepEqual(mat32SC3.data32S, new Int32Array(arrayC3));
+        assert.deepEqual(mat32SC4.data32S, new Int32Array(arrayC4));
+
+        assert.deepEqual(mat32FC1.data32F, new Float32Array(arrayC1));
+        assert.deepEqual(mat32FC2.data32F, new Float32Array(arrayC2));
+        assert.deepEqual(mat32FC3.data32F, new Float32Array(arrayC3));
+        assert.deepEqual(mat32FC4.data32F, new Float32Array(arrayC4));
+
+        assert.deepEqual(mat64FC1.data64F, new Float64Array(arrayC1));
+        assert.deepEqual(mat64FC2.data64F, new Float64Array(arrayC2));
+        assert.deepEqual(mat64FC3.data64F, new Float64Array(arrayC3));
+        assert.deepEqual(mat64FC4.data64F, new Float64Array(arrayC4));
+
+        mat8UC1.delete();
+        mat8UC2.delete();
+        mat8UC3.delete();
+        mat8UC4.delete();
+        mat8SC1.delete();
+        mat8SC2.delete();
+        mat8SC3.delete();
+        mat8SC4.delete();
+        mat16UC1.delete();
+        mat16UC2.delete();
+        mat16UC3.delete();
+        mat16UC4.delete();
+        mat16SC1.delete();
+        mat16SC2.delete();
+        mat16SC3.delete();
+        mat16SC4.delete();
+        mat32SC1.delete();
+        mat32SC2.delete();
+        mat32SC3.delete();
+        mat32SC4.delete();
+        mat32FC1.delete();
+        mat32FC2.delete();
+        mat32FC3.delete();
+        mat32FC4.delete();
+        mat64FC1.delete();
+        mat64FC2.delete();
+        mat64FC3.delete();
+        mat64FC4.delete();
+    }
+
+    // matFromImageData
+    {
+        // Only test in browser
+        if (typeof window === 'undefined') {
+            return;
+        }
+        let canvas = window.document.createElement('canvas');
+        canvas.width = 2;
+        canvas.height = 2;
+        let ctx = canvas.getContext('2d');
+        ctx.fillStyle='#FF0000';
+        ctx.fillRect(0, 0, 1, 1);
+        ctx.fillRect(1, 1, 1, 1);
+
+        let imageData = ctx.getImageData(0, 0, 2, 2);
+        let mat = cv.matFromImageData(imageData);
+
+        assert.deepEqual(mat.data, new Uint8Array(imageData.data));
+
+        mat.delete();
+    }
+
+    // Mat(mat)
+    {
+        let mat = new cv.Mat(2, 2, cv.CV_8UC4, new cv.Scalar(1, 0, 1, 0));
+        let mat1 = new cv.Mat(mat);
+        let mat2 = mat;
+
+        assert.equal(mat.rows, mat1.rows);
+        assert.equal(mat.cols, mat1.cols);
+        assert.equal(mat.type(), mat1.type());
+        assert.deepEqual(mat.data, mat1.data);
+
+        mat.delete();
+
+        assert.equal(mat1.isDeleted(), false);
+        assert.equal(mat2.isDeleted(), true);
+
+        mat1.delete();
+    }
+
+    // mat.setTo
+    {
+        let mat = new cv.Mat(2, 2, cv.CV_8UC4);
+        let s = [0, 1, 2, 3];
+
+        mat.setTo(s);
+
+        assert.deepEqual(mat.ptr(0, 0), new Uint8Array(s));
+        assert.deepEqual(mat.ptr(0, 1), new Uint8Array(s));
+        assert.deepEqual(mat.ptr(1, 0), new Uint8Array(s));
+        assert.deepEqual(mat.ptr(1, 1), new Uint8Array(s));
+
+        let s1 = [0, 0, 0, 0];
+        mat.setTo(s1);
+        let mask = cv.matFromArray(2, 2, cv.CV_8UC1, [0, 1, 0, 1]);
+        mat.setTo(s, mask);
+
+        assert.deepEqual(mat.ptr(0, 0), new Uint8Array(s1));
+        assert.deepEqual(mat.ptr(0, 1), new Uint8Array(s));
+        assert.deepEqual(mat.ptr(1, 0), new Uint8Array(s1));
+        assert.deepEqual(mat.ptr(1, 1), new Uint8Array(s));
+
+        mat.delete();
+        mask.delete();
+    }
+});
+
+QUnit.test('test_mat_ptr', function(assert) {
+    const RValue = 3;
+    const GValue = 7;
+    const BValue = 197;
+
+    // cv.CV_8UC1 + Mat::ptr(int).
+    {
+        let mat = new cv.Mat(10, 10, cv.CV_8UC1);
+        let view = mat.data;
+
+        // Alter matrix[2, 1].
+        let step = 10;
+        view[2 * step + 1] = RValue;
+
+        // Access matrix[2, 1].
+        view = mat.ptr(2);
+
+        assert.equal(view[1], RValue);
+
+        mat.delete();
+    }
+
+    // cv.CV_8UC3 + Mat::ptr(int).
+    {
+        let mat = new cv.Mat(10, 10, cv.CV_8UC3);
+        let view = mat.data;
+
+        // Alter matrix[2, 1].
+        let step = 3 * 10;
+        view[2 * step + 3] = RValue;
+        view[2 * step + 3 + 1] = GValue;
+        view[2 * step + 3 + 2] = BValue;
+
+        // Access matrix[2, 1].
+        view = mat.ptr(2);
+
+        assert.equal(view[3], RValue);
+        assert.equal(view[3 + 1], GValue);
+        assert.equal(view[3 + 2], BValue);
+
+        mat.delete();
+    }
+
+    // cv.CV_8UC3 + Mat::ptr(int, int).
+    {
+        let mat = new cv.Mat(10, 10, cv.CV_8UC3);
+        let view = mat.data;
+
+        // Alter matrix[2, 1].
+        let step = 3 * 10;
+        view[2 * step + 3] = RValue;
+        view[2 * step + 3 + 1] = GValue;
+        view[2 * step + 3 + 2] = BValue;
+
+        // Access matrix[2, 1].
+        view = mat.ptr(2, 1);
+
+        assert.equal(view[0], RValue);
+        assert.equal(view[1], GValue);
+        assert.equal(view[2], BValue);
+
+        mat.delete();
+    }
+
+    const RValueF32 = 3.3;
+    const GValueF32 = 7.3;
+    const BValueF32 = 197.3;
+    const EPSILON = 0.001;
+
+    // cv.CV_32FC1 + Mat::ptr(int).
+    {
+        let mat = new cv.Mat(10, 10, cv.CV_32FC1);
+        let view = mat.data32F;
+
+        // Alter matrix[2, 1].
+        let step = 10;
+        view[2 * step + 1] = RValueF32;
+
+        // Access matrix[2, 1].
+        view = mat.floatPtr(2);
+
+        assert.ok(Math.abs(view[1] - RValueF32) < EPSILON);
+
+        mat.delete();
+    }
+
+    // cv.CV_32FC3 + Mat::ptr(int).
+    {
+        let mat = new cv.Mat(10, 10, cv.CV_32FC3);
+        let view = mat.data32F;
+
+        // Alter matrix[2, 1].
+        let step = mat.step1(0);
+        view[2 * step + 3] = RValueF32;
+        view[2 * step + 3 + 1] = GValueF32;
+        view[2 * step + 3 + 2] = BValueF32;
+
+        // Access matrix[2, 1].
+        view = mat.floatPtr(2);
+
+        assert.ok(Math.abs(view[3] - RValueF32) < EPSILON);
+        assert.ok(Math.abs(view[3 + 1] - GValueF32) < EPSILON);
+        assert.ok(Math.abs(view[3 + 2] - BValueF32) < EPSILON);
+
+        mat.delete();
+    }
+
+    // cv.CV_32FC3 + Mat::ptr(int, int).
+    {
+        let mat = new cv.Mat(10, 10, cv.CV_32FC3);
+        let view = mat.data32F;
+
+        // Alter matrix[2, 1].
+        let step = mat.step1(0);
+        view[2 * step + 3] = RValueF32;
+        view[2 * step + 3 + 1] = GValueF32;
+        view[2 * step + 3 + 2] = BValueF32;
+
+        // Access matrix[2, 1].
+        view = mat.floatPtr(2, 1);
+
+        assert.ok(Math.abs(view[0] - RValueF32) < EPSILON);
+        assert.ok(Math.abs(view[1] - GValueF32) < EPSILON);
+        assert.ok(Math.abs(view[2] - BValueF32) < EPSILON);
+
+        mat.delete();
+    }
+});
+
+QUnit.test('test_mat_zeros', function(assert) {
+    let zeros = new Uint8Array(10*10).fill(0);
+    // Mat::zeros(int, int, int)
+    {
+        let mat = cv.Mat.zeros(10, 10, cv.CV_8UC1);
+        let view = mat.data;
+
+        assert.deepEqual(view, zeros);
+
+        mat.delete();
+    }
+
+    // Mat::zeros(Size, int)
+    {
+        let mat = cv.Mat.zeros({height: 10, width: 10}, cv.CV_8UC1);
+        let view = mat.data;
+
+        assert.deepEqual(view, zeros);
+
+        mat.delete();
+    }
+});
+
+QUnit.test('test_mat_ones', function(assert) {
+    let ones = new Uint8Array(10*10).fill(1);
+    // Mat::ones(int, int, int)
+    {
+        let mat = cv.Mat.ones(10, 10, cv.CV_8UC1);
+        let view = mat.data;
+
+        assert.deepEqual(view, ones);
+    }
+    // Mat::ones(Size, int)
+    {
+        let mat = cv.Mat.ones({height: 10, width: 10}, cv.CV_8UC1);
+        let view = mat.data;
+
+        assert.deepEqual(view, ones);
+    }
+});
+
+QUnit.test('test_mat_eye', function(assert) {
+    let eye4by4 = new Uint8Array([1, 0, 0, 0,
+                                  0, 1, 0, 0,
+                                  0, 0, 1, 0,
+                                  0, 0, 0, 1]);
+    // Mat::eye(int, int, int)
+    {
+        let mat = cv.Mat.eye(4, 4, cv.CV_8UC1);
+        let view = mat.data;
+
+        assert.deepEqual(view, eye4by4);
+    }
+
+    // Mat::eye(Size, int)
+    {
+        let mat = cv.Mat.eye({height: 4, width: 4}, cv.CV_8UC1);
+        let view = mat.data;
+
+        assert.deepEqual(view, eye4by4);
+    }
+});
+
+QUnit.test('test_mat_miscs', function(assert) {
+    // Mat::col(int)
+    {
+        let mat = cv.matFromArray(2, 2, cv.CV_8UC2, [1, 2, 3, 4, 5, 6, 7, 8]);
+        let col = mat.col(1);
+
+        assert.equal(col.isContinuous(), false);
+        assert.equal(col.ptr(0, 0)[0], 3);
+        assert.equal(col.ptr(0, 0)[1], 4);
+        assert.equal(col.ptr(1, 0)[0], 7);
+        assert.equal(col.ptr(1, 0)[1], 8);
+
+        col.delete();
+        mat.delete();
+    }
+
+    // Mat::row(int)
+    {
+        let mat = cv.Mat.zeros(5, 5, cv.CV_8UC2);
+        let row = mat.row(1);
+        let view = row.data;
+        assert.equal(view[0], 0);
+        assert.equal(view[4], 0);
+
+        row.delete();
+        mat.delete();
+    }
+
+    // Mat::convertTo(Mat, int, double, double)
+    {
+        let mat = cv.Mat.ones(5, 5, cv.CV_8UC3);
+        let grayMat = cv.Mat.zeros(5, 5, cv.CV_8UC1);
+
+        mat.convertTo(grayMat, cv.CV_8U, 2, 1);
+        // dest = 2 * source(x, y) + 1.
+        let view = grayMat.data;
+        assert.equal(view[0], (1 * 2) + 1);
+
+        mat.convertTo(grayMat, cv.CV_8U);
+        // dest = 1 * source(x, y) + 0.
+        assert.equal(view[0], 1);
+
+        mat.convertTo(grayMat, cv.CV_8U, 2);
+        // dest = 2 * source(x, y) + 0.
+        assert.equal(view[0], 2);
+
+        grayMat.delete();
+        mat.delete();
+    }
+
+    // split
+    {
+        const R =7;
+        const G =13;
+        const B =29;
+
+        let mat = cv.Mat.ones(5, 5, cv.CV_8UC3);
+        let view = mat.data;
+        view[0] = R;
+        view[1] = G;
+        view[2] = B;
+
+        let bgrPlanes = new cv.MatVector();
+        cv.split(mat, bgrPlanes);
+        assert.equal(bgrPlanes.size(), 3);
+
+        let rMat = bgrPlanes.get(0);
+        view = rMat.data;
+        assert.equal(view[0], R);
+
+        let gMat = bgrPlanes.get(1);
+        view = gMat.data;
+        assert.equal(view[0], G);
+
+        let bMat = bgrPlanes.get(2);
+        view = bMat.data;
+        assert.equal(view[0], B);
+
+        mat.delete();
+        rMat.delete();
+        gMat.delete();
+        bgrPlanes.delete();
+        bMat.delete();
+    }
+
+    // elemSize
+    {
+        let mat = cv.Mat.ones(5, 5, cv.CV_8UC3);
+        assert.equal(mat.elemSize(), 3);
+        assert.equal(mat.elemSize1(), 1);
+
+        let mat2 = cv.Mat.zeros(5, 5, cv.CV_8UC1);
+        assert.equal(mat2.elemSize(), 1);
+        assert.equal(mat2.elemSize1(), 1);
+
+        let mat3 = cv.Mat.eye(5, 5, cv.CV_16UC3);
+        assert.equal(mat3.elemSize(), 2 * 3);
+        assert.equal(mat3.elemSize1(), 2);
+
+        mat.delete();
+        mat2.delete();
+        mat3.delete();
+    }
+
+    // step
+    {
+        let mat = cv.Mat.ones(5, 5, cv.CV_8UC3);
+        assert.equal(mat.step[0], 15);
+        assert.equal(mat.step[1], 3);
+
+        let mat2 = cv.Mat.zeros(5, 5, cv.CV_8UC1);
+        assert.equal(mat2.step[0], 5);
+        assert.equal(mat2.step[1], 1);
+
+        let mat3 = cv.Mat.eye(5, 5, cv.CV_16UC3);
+        assert.equal(mat3.step[0], 30);
+        assert.equal(mat3.step[1], 6);
+
+        mat.delete();
+        mat2.delete();
+        mat3.delete();
+    }
+
+    // dot
+    {
+        let mat = cv.Mat.ones(5, 5, cv.CV_8UC1);
+        let mat2 = cv.Mat.eye(5, 5, cv.CV_8UC1);
+
+        assert.equal(mat.dot(mat), 25);
+        assert.equal(mat.dot(mat2), 5);
+        assert.equal(mat2.dot(mat2), 5);
+
+        mat.delete();
+        mat2.delete();
+    }
+
+    // mul
+    {
+        const FACTOR = 5;
+        let mat = cv.Mat.ones(4, 4, cv.CV_8UC1);
+        let mat2 = cv.Mat.eye(4, 4, cv.CV_8UC1);
+
+        let expected = new Uint8Array([FACTOR, 0, 0, 0,
+                                       0, FACTOR, 0, 0,
+                                       0, 0, FACTOR, 0,
+                                       0, 0, 0, FACTOR]);
+        let mat3 = mat.mul(mat2, FACTOR);
+
+        assert.deepEqual(mat3.data, expected);
+
+        mat.delete();
+        mat2.delete();
+        mat3.delete();
+    }
+});
+
+
+QUnit.test('test mat access', function(assert) {
+    // test memory view
+    {
+        let data = new Uint8Array([0, 0, 0, 255, 0, 1, 2, 3]);
+        let dataPtr = cv._malloc(8);
+
+        let dataHeap = new Uint8Array(cv.HEAPU8.buffer, dataPtr, 8);
+        dataHeap.set(new Uint8Array(data.buffer));
+
+        let mat = new cv.Mat(8, 1, cv.CV_8UC1, dataPtr, 0);
+
+
+        let unsignedCharView = new Uint8Array(data.buffer);
+        let charView = new Int8Array(data.buffer);
+        let shortView = new Int16Array(data.buffer);
+        let unsignedShortView = new Uint16Array(data.buffer);
+        let intView = new Int32Array(data.buffer);
+        let float32View = new Float32Array(data.buffer);
+        let float64View = new Float64Array(data.buffer);
+
+
+        assert.deepEqual(unsignedCharView, mat.data);
+        assert.deepEqual(charView, mat.data8S);
+        assert.deepEqual(shortView, mat.data16S);
+        assert.deepEqual(unsignedShortView, mat.data16U);
+        assert.deepEqual(intView, mat.data32S);
+        assert.deepEqual(float32View, mat.data32F);
+        assert.deepEqual(float64View, mat.data64F);
+    }
+
+    // test ucharAt(i)
+    {
+        let data = new Uint8Array([0, 0, 0, 255, 0, 1, 2, 3]);
+        let dataPtr = cv._malloc(8);
+
+        let dataHeap = new Uint8Array(cv.HEAPU8.buffer, dataPtr, 8);
+        dataHeap.set(new Uint8Array(data.buffer));
+
+        let mat = new cv.Mat(8, 1, cv.CV_8UC1, dataPtr, 0);
+
+        assert.equal(mat.ucharAt(0), 0);
+        assert.equal(mat.ucharAt(1), 0);
+        assert.equal(mat.ucharAt(2), 0);
+        assert.equal(mat.ucharAt(3), 255);
+        assert.equal(mat.ucharAt(4), 0);
+        assert.equal(mat.ucharAt(5), 1);
+        assert.equal(mat.ucharAt(6), 2);
+        assert.equal(mat.ucharAt(7), 3);
+    }
+
+    // test ushortAt(i)
+    {
+        let data = new Uint16Array([0, 1000, 65000, 255, 0, 1, 2, 3]);
+        let dataPtr = cv._malloc(16);
+
+        let dataHeap = new Uint16Array(cv.HEAPU8.buffer, dataPtr, 8);
+        dataHeap.set(new Uint16Array(data.buffer));
+
+        let mat = new cv.Mat(8, 1, cv.CV_16SC1, dataPtr, 0);
+
+        assert.equal(mat.ushortAt(0), 0);
+        assert.equal(mat.ushortAt(1), 1000);
+        assert.equal(mat.ushortAt(2), 65000);
+        assert.equal(mat.ushortAt(3), 255);
+        assert.equal(mat.ushortAt(4), 0);
+        assert.equal(mat.ushortAt(5), 1);
+        assert.equal(mat.ushortAt(6), 2);
+        assert.equal(mat.ushortAt(7), 3);
+    }
+
+    // test intAt(i)
+    {
+        let data = new Int32Array([0, -1000, 65000, 255, -2000000, -1, 2, 3]);
+        let dataPtr = cv._malloc(32);
+
+        let dataHeap = new Int32Array(cv.HEAPU32.buffer, dataPtr, 8);
+        dataHeap.set(new Int32Array(data.buffer));
+
+        let mat = new cv.Mat(8, 1, cv.CV_32SC1, dataPtr, 0);
+
+        assert.equal(mat.intAt(0), 0);
+        assert.equal(mat.intAt(1), -1000);
+        assert.equal(mat.intAt(2), 65000);
+        assert.equal(mat.intAt(3), 255);
+        assert.equal(mat.intAt(4), -2000000);
+        assert.equal(mat.intAt(5), -1);
+        assert.equal(mat.intAt(6), 2);
+        assert.equal(mat.intAt(7), 3);
+    }
+
+    // test floatAt(i)
+    {
+        const EPSILON = 0.001;
+        let data = new Float32Array([0, -10.5, 650.001, 255, -20.1, -1.2, 2, 3.5]);
+        let dataPtr = cv._malloc(32);
+
+        let dataHeap = new Float32Array(cv.HEAPU32.buffer, dataPtr, 8);
+        dataHeap.set(new Float32Array(data.buffer));
+
+        let mat = new cv.Mat(8, 1, cv.CV_32FC1, dataPtr, 0);
+
+        assert.equal(Math.abs(mat.floatAt(0)-0) < EPSILON, true);
+        assert.equal(Math.abs(mat.floatAt(1)+10.5) < EPSILON, true);
+        assert.equal(Math.abs(mat.floatAt(2)-650.001) < EPSILON, true);
+        assert.equal(Math.abs(mat.floatAt(3)-255) < EPSILON, true);
+        assert.equal(Math.abs(mat.floatAt(4)+20.1) < EPSILON, true);
+        assert.equal(Math.abs(mat.floatAt(5)+1.2) < EPSILON, true);
+        assert.equal(Math.abs(mat.floatAt(6)-2) < EPSILON, true);
+        assert.equal(Math.abs(mat.floatAt(7)-3.5) < EPSILON, true);
+    }
+
+    // test intAt(i,j)
+    {
+        let mat = cv.Mat.eye({height: 3, width: 3}, cv.CV_32SC1);
+
+        assert.equal(mat.intAt(0, 0), 1);
+        assert.equal(mat.intAt(0, 1), 0);
+        assert.equal(mat.intAt(0, 2), 0);
+        assert.equal(mat.intAt(1, 0), 0);
+        assert.equal(mat.intAt(1, 1), 1);
+        assert.equal(mat.intAt(1, 2), 0);
+        assert.equal(mat.intAt(2, 0), 0);
+        assert.equal(mat.intAt(2, 1), 0);
+        assert.equal(mat.intAt(2, 2), 1);
+
+        mat.delete();
+    }
+});
+
+QUnit.test('test_mat_operations', function(assert) {
+    // test minMaxLoc
+    {
+        let src = cv.Mat.ones(4, 4, cv.CV_8UC1);
+
+        src.data[2] = 0;
+        src.data[5] = 2;
+
+        let result = cv.minMaxLoc(src);
+
+        assert.equal(result.minVal, 0);
+        assert.equal(result.maxVal, 2);
+        assert.deepEqual(result.minLoc, {x: 2, y: 0});
+        assert.deepEqual(result.maxLoc, {x: 1, y: 1});
+
+        src.delete();
+    }
+});
+
+QUnit.test('test_mat_roi', function(assert) {
+    // test minMaxLoc
+    {
+        let mat = cv.matFromArray(2, 2, cv.CV_8UC1, [0, 1, 2, 3]);
+        let roi = mat.roi(new cv.Rect(1, 1, 1, 1));
+
+        assert.equal(roi.rows, 1);
+        assert.equal(roi.cols, 1);
+        assert.deepEqual(roi.data, new Uint8Array([mat.ucharAt(1, 1)]));
+
+        mat.delete();
+        roi.delete();
+    }
+});
+
+
+QUnit.test('test_mat_range', function(assert) {
+    {
+        let src = cv.matFromArray(2, 2, cv.CV_8UC1, [0, 1, 2, 3]);
+        let mat = src.colRange(0, 1);
+
+        assert.equal(mat.isContinuous(), false);
+        assert.equal(mat.rows, 2);
+        assert.equal(mat.cols, 1);
+        assert.equal(mat.ucharAt(0), 0);
+        assert.equal(mat.ucharAt(1), 2);
+
+        mat.delete();
+
+        mat = src.colRange({start: 0, end: 1});
+
+        assert.equal(mat.isContinuous(), false);
+        assert.equal(mat.rows, 2);
+        assert.equal(mat.cols, 1);
+        assert.equal(mat.ucharAt(0), 0);
+        assert.equal(mat.ucharAt(1), 2);
+
+        mat.delete();
+
+        mat = src.rowRange(1, 2);
+
+        assert.equal(mat.rows, 1);
+        assert.equal(mat.cols, 2);
+        assert.deepEqual(mat.data, new Uint8Array([2, 3]));
+
+        mat.delete();
+
+        mat = src.rowRange({start: 1, end: 2});
+
+        assert.equal(mat.rows, 1);
+        assert.equal(mat.cols, 2);
+        assert.deepEqual(mat.data, new Uint8Array([2, 3]));
+
+        mat.delete();
+
+        src.delete();
+    }
+});
+
+QUnit.test('test_mat_diag', function(assert) {
+    // test diag
+    {
+        let mat = cv.matFromArray(3, 3, cv.CV_8UC1, [0, 1, 2, 3, 4, 5, 6, 7, 8]);
+        let d = mat.diag();
+        let d1 = mat.diag(1);
+        let d2 = mat.diag(-1);
+
+        assert.equal(mat.isContinuous(), true);
+        assert.equal(d.isContinuous(), false);
+        assert.equal(d1.isContinuous(), false);
+        assert.equal(d2.isContinuous(), false);
+
+        assert.equal(d.ucharAt(0), 0);
+        assert.equal(d.ucharAt(1), 4);
+        assert.equal(d.ucharAt(2), 8);
+
+        assert.equal(d1.ucharAt(0), 1);
+        assert.equal(d1.ucharAt(1), 5);
+
+        assert.equal(d2.ucharAt(0), 3);
+        assert.equal(d2.ucharAt(1), 7);
+
+        mat.delete();
+        d.delete();
+        d1.delete();
+        d2.delete();
+    }
+});
diff --git a/modules/js/test/test_objdetect.js b/modules/js/test/test_objdetect.js
new file mode 100644 (file)
index 0000000..76f0a77
--- /dev/null
@@ -0,0 +1,161 @@
+// //////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//
+
+// //////////////////////////////////////////////////////////////////////////////////////
+// Author: Sajjad Taheri, University of California, Irvine. sajjadt[at]uci[dot]edu
+//
+//                             LICENSE AGREEMENT
+// Copyright (c) 2015 The Regents of the University of California (Regents)
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+// 3. Neither the name of the University nor the
+//    names of its contributors may be used to endorse or promote products
+//    derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+if (typeof module !== 'undefined' && module.exports) {
+    // The envrionment is Node.js
+    var cv = require('./opencv.js'); // eslint-disable-line no-var
+    cv.FS_createLazyFile('/', 'haarcascade_frontalface_default.xml', // eslint-disable-line new-cap
+                         'haarcascade_frontalface_default.xml', true, false);
+}
+
+QUnit.module('Object Detection', {});
+QUnit.test('Cascade classification', function(assert) {
+    // Group rectangle
+    {
+        let rectList = new cv.RectVector();
+        let weights = new cv.IntVector();
+        let groupThreshold = 1;
+        const eps = 0.2;
+
+        let rect1 = new cv.Rect(1, 2, 3, 4);
+        let rect2 = new cv.Rect(1, 4, 2, 3);
+
+        rectList.push_back(rect1);
+        rectList.push_back(rect2);
+
+        cv.groupRectangles(rectList, weights, groupThreshold, eps);
+
+
+        rectList.delete();
+        weights.delete();
+    }
+
+    // CascadeClassifier
+    {
+        let classifier = new cv.CascadeClassifier();
+        const modelPath = '/haarcascade_frontalface_default.xml';
+
+        assert.equal(classifier.empty(), true);
+
+
+        classifier.load(modelPath);
+        assert.equal(classifier.empty(), false);
+
+        let image = cv.Mat.eye({height: 10, width: 10}, cv.CV_8UC3);
+        let objects = new cv.RectVector();
+        let numDetections = new cv.IntVector();
+        const scaleFactor = 1.1;
+        const minNeighbors = 3;
+        const flags = 0;
+        const minSize = {height: 0, width: 0};
+        const maxSize = {height: 10, width: 10};
+
+        classifier.detectMultiScale2(image, objects, numDetections, scaleFactor,
+                                     minNeighbors, flags, minSize, maxSize);
+
+        // test default parameters
+        classifier.detectMultiScale2(image, objects, numDetections, scaleFactor,
+                                     minNeighbors, flags, minSize);
+        classifier.detectMultiScale2(image, objects, numDetections, scaleFactor,
+                                     minNeighbors, flags);
+        classifier.detectMultiScale2(image, objects, numDetections, scaleFactor,
+                                     minNeighbors);
+        classifier.detectMultiScale2(image, objects, numDetections, scaleFactor);
+
+        classifier.delete();
+        objects.delete();
+        numDetections.delete();
+    }
+
+    // HOGDescriptor
+    {
+        let hog = new cv.HOGDescriptor();
+        let mat = new cv.Mat({height: 10, width: 10}, cv.CV_8UC1);
+        let descriptors = new cv.FloatVector();
+        let locations = new cv.PointVector();
+
+
+        assert.equal(hog.winSize.height, 128);
+        assert.equal(hog.winSize.width, 64);
+        assert.equal(hog.nbins, 9);
+        assert.equal(hog.derivAperture, 1);
+        assert.equal(hog.winSigma, -1);
+        assert.equal(hog.histogramNormType, 0);
+        assert.equal(hog.nlevels, 64);
+
+        hog.nlevels = 32;
+        assert.equal(hog.nlevels, 32);
+
+        hog.delete();
+        mat.delete();
+        descriptors.delete();
+        locations.delete();
+    }
+});
diff --git a/modules/js/test/test_utils.js b/modules/js/test/test_utils.js
new file mode 100644 (file)
index 0000000..0f345b4
--- /dev/null
@@ -0,0 +1,253 @@
+//  //////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+
+// //////////////////////////////////////////////////////////////////////////////////////
+// Author: Sajjad Taheri, University of California, Irvine. sajjadt[at]uci[dot]edu
+//
+//                             LICENSE AGREEMENT
+// Copyright (c) 2015 The Regents of the University of California (Regents)
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+// 3. Neither the name of the University nor the
+//    names of its contributors may be used to endorse or promote products
+//    derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+if (typeof module !== 'undefined' && module.exports) {
+    // The envrionment is Node.js
+    var cv = require('./opencv.js'); // eslint-disable-line no-var
+}
+QUnit.module('Utils', {});
+QUnit.test('Test vectors', function(assert) {
+    {
+        let pointVector = new cv.PointVector();
+        for (let i=0; i<100; ++i) {
+            pointVector.push_back({x: i, y: 2*i});
+        }
+
+        assert.equal(pointVector.size(), 100);
+
+        let index = 10;
+        let item = pointVector.get(index);
+        assert.equal(item.x, index);
+        assert.equal(item.y, 2*index);
+
+        index = 0;
+        item = pointVector.get(index);
+        assert.equal(item.x, index);
+        assert.equal(item.y, 2*index);
+
+        index = 99;
+        item = pointVector.get(index);
+        assert.equal(item.x, index);
+        assert.equal(item.y, 2*index);
+
+        pointVector.delete();
+    }
+
+    {
+        let pointVector = new cv.PointVector();
+        for (let i=0; i<100; ++i) {
+            pointVector.push_back(new cv.Point(i, 2*i));
+        }
+
+        pointVector.push_back(new cv.Point());
+
+        assert.equal(pointVector.size(), 101);
+
+        let index = 10;
+        let item = pointVector.get(index);
+        assert.equal(item.x, index);
+        assert.equal(item.y, 2*index);
+
+        index = 0;
+        item = pointVector.get(index);
+        assert.equal(item.x, index);
+        assert.equal(item.y, 2*index);
+
+        index = 99;
+        item = pointVector.get(index);
+        assert.equal(item.x, index);
+        assert.equal(item.y, 2*index);
+
+        index = 100;
+        item = pointVector.get(index);
+        assert.equal(item.x, 0);
+        assert.equal(item.y, 0);
+
+        pointVector.delete();
+    }
+});
+QUnit.test('Test Rect', function(assert) {
+    let rectVector = new cv.RectVector();
+    let rect = {x: 1, y: 2, width: 3, height: 4};
+    rectVector.push_back(rect);
+    rectVector.push_back(new cv.Rect());
+    rectVector.push_back(new cv.Rect(rect));
+    rectVector.push_back(new cv.Rect({x: 5, y: 6}, {width: 7, height: 8}));
+    rectVector.push_back(new cv.Rect(9, 10, 11, 12));
+
+    assert.equal(rectVector.size(), 5);
+
+    let item = rectVector.get(0);
+    assert.equal(item.x, 1);
+    assert.equal(item.y, 2);
+    assert.equal(item.width, 3);
+    assert.equal(item.height, 4);
+
+    item = rectVector.get(1);
+    assert.equal(item.x, 0);
+    assert.equal(item.y, 0);
+    assert.equal(item.width, 0);
+    assert.equal(item.height, 0);
+
+    item = rectVector.get(2);
+    assert.equal(item.x, 1);
+    assert.equal(item.y, 2);
+    assert.equal(item.width, 3);
+    assert.equal(item.height, 4);
+
+    item = rectVector.get(3);
+    assert.equal(item.x, 5);
+    assert.equal(item.y, 6);
+    assert.equal(item.width, 7);
+    assert.equal(item.height, 8);
+
+    item = rectVector.get(4);
+    assert.equal(item.x, 9);
+    assert.equal(item.y, 10);
+    assert.equal(item.width, 11);
+    assert.equal(item.height, 12);
+
+    rectVector.delete();
+});
+QUnit.test('Test Size', function(assert) {
+    {
+        let mat = new cv.Mat();
+        mat.create({width: 5, height: 10}, cv.CV_8UC4);
+        let size = mat.size();
+
+        assert.ok(mat.type() === cv.CV_8UC4);
+        assert.ok(size.height === 10);
+        assert.ok(size.width === 5);
+        assert.ok(mat.channels() === 4);
+
+        mat.delete();
+    }
+
+    {
+        let mat = new cv.Mat();
+        mat.create(new cv.Size(5, 10), cv.CV_8UC4);
+        let size = mat.size();
+
+        assert.ok(mat.type() === cv.CV_8UC4);
+        assert.ok(size.height === 10);
+        assert.ok(size.width === 5);
+        assert.ok(mat.channels() === 4);
+
+        mat.delete();
+    }
+});
+
+
+QUnit.test('test_rotated_rect', function(assert) {
+    {
+        let rect = {center: {x: 100, y: 100}, size: {height: 100, width: 50}, angle: 30};
+
+        assert.equal(rect.center.x, 100);
+        assert.equal(rect.center.y, 100);
+        assert.equal(rect.angle, 30);
+        assert.equal(rect.size.height, 100);
+        assert.equal(rect.size.width, 50);
+    }
+
+    {
+        let rect = new cv.RotatedRect();
+
+        assert.equal(rect.center.x, 0);
+        assert.equal(rect.center.y, 0);
+        assert.equal(rect.angle, 0);
+        assert.equal(rect.size.height, 0);
+        assert.equal(rect.size.width, 0);
+
+        let points = cv.RotatedRect.points(rect);
+
+        assert.equal(points[0].x, 0);
+        assert.equal(points[0].y, 0);
+        assert.equal(points[1].x, 0);
+        assert.equal(points[1].y, 0);
+        assert.equal(points[2].x, 0);
+        assert.equal(points[2].y, 0);
+        assert.equal(points[3].x, 0);
+        assert.equal(points[3].y, 0);
+    }
+
+    {
+        let rect = new cv.RotatedRect({x: 100, y: 100}, {height: 100, width: 50}, 30);
+
+        assert.equal(rect.center.x, 100);
+        assert.equal(rect.center.y, 100);
+        assert.equal(rect.angle, 30);
+        assert.equal(rect.size.height, 100);
+        assert.equal(rect.size.width, 50);
+
+        let points = cv.RotatedRect.points(rect);
+
+        assert.equal(points[0].x, cv.RotatedRect.boundingRect2f(rect).x);
+        assert.equal(points[1].y, cv.RotatedRect.boundingRect2f(rect).y);
+    }
+});
diff --git a/modules/js/test/test_video.js b/modules/js/test/test_video.js
new file mode 100644 (file)
index 0000000..f26a8b7
--- /dev/null
@@ -0,0 +1,107 @@
+//  //////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+
+// //////////////////////////////////////////////////////////////////////////////////////
+// Author: Sajjad Taheri, University of California, Irvine. sajjadt[at]uci[dot]edu
+//
+//                             LICENSE AGREEMENT
+// Copyright (c) 2015 The Regents of the University of California (Regents)
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+// 3. Neither the name of the University nor the
+//    names of its contributors may be used to endorse or promote products
+//    derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+if (typeof module !== 'undefined' && module.exports) {
+    // The envrionment is Node.js
+    var cv = require('./opencv.js'); // eslint-disable-line no-var
+}
+
+QUnit.module('Video', {});
+QUnit.test('Background Segmentation', function(assert) {
+    // BackgroundSubtractorMOG2
+    {
+        const history = 600;
+        const varThreshold = 15;
+        const detectShadows = true;
+
+        let mog2 = new cv.BackgroundSubtractorMOG2(history, varThreshold, detectShadows);
+
+        assert.equal(mog2 instanceof cv.BackgroundSubtractorMOG2, true);
+
+        mog2.delete();
+
+        mog2 = new cv.BackgroundSubtractorMOG2();
+
+        assert.equal(mog2 instanceof cv.BackgroundSubtractorMOG2, true);
+
+        mog2.delete();
+
+        mog2 = new cv.BackgroundSubtractorMOG2(history);
+
+        assert.equal(mog2 instanceof cv.BackgroundSubtractorMOG2, true);
+
+        mog2.delete();
+
+        mog2 = new cv.BackgroundSubtractorMOG2(history, varThreshold);
+
+        assert.equal(mog2 instanceof cv.BackgroundSubtractorMOG2, true);
+
+        mog2.delete();
+    }
+});
diff --git a/modules/js/test/tests.html b/modules/js/test/tests.html
new file mode 100644 (file)
index 0000000..2ed5325
--- /dev/null
@@ -0,0 +1,78 @@
+<!DOCTYPE html>
+<html lang="en">
+    <head>
+        <title>OpenCV JS Tests</title>
+        <meta charset="utf-8">
+        <meta name="viewport" content="width=device-width, user-scalable=no, minimum-scale=1.0, maximum-scale=1.0">
+        <link rel="stylesheet" href="http://code.jquery.com/qunit/qunit-1.20.0.css" type="text/css" media="screen">
+        <style>
+            body {
+                font-family: Monospace;
+                background-color: #ffffff;
+                margin: 0px;
+            }
+            a {
+                color: #0040ff;
+            }
+        </style>
+    </head>
+    <body>
+
+        <div id="qunit"></div>
+        <div id="qunit-fixture"></div>
+
+        <script src="http://code.jquery.com/qunit/qunit-2.0.1.js"></script>
+        <script type="application/javascript" async src="opencv.js"></script>
+        <script type="application/javascript" src="test_mat.js"></script>
+        <script type="application/javascript" src="test_utils.js"></script>
+        <script type="application/javascript" src="test_imgproc.js"></script>
+        <script type="application/javascript" src="test_objdetect.js"></script>
+        <script type="application/javascript" src="test_video.js"></script>
+        <script  type='text/javascript'>
+            QUnit.config.autostart = false;
+
+            var Module = {
+            preRun: [function() {
+                Module.FS_createPreloadedFile('/', 'haarcascade_frontalface_default.xml', 'haarcascade_frontalface_default.xml', true, false);
+            }],
+            postRun: [] ,
+                    onRuntimeInitialized: function() {
+                        console.log("Runtime is ready...");
+                        QUnit.start();
+                    },
+            print: (function() {
+              var element = document.getElementById('output');
+              if (element) element.value = ''; // clear browser cache
+              return function(text) {
+                console.log(text);
+                if (element) {
+                  element.value += text + "\n";
+                  element.scrollTop = element.scrollHeight; // focus on bottom
+                }
+              };
+            })(),
+            printErr: function(text) {
+              console.log(text);
+            },
+            setStatus: function(text) {
+              console.log(text);
+            },
+            totalDependencies: 0
+          };
+
+          Module.setStatus('Downloading...');
+          window.onerror = function(event) {
+            Module.setStatus('Exception thrown, see JavaScript console');
+            Module.setStatus = function(text) {
+              if (text) Module.printErr('[post-exception status] ' + text);
+            };
+          };
+        </script>
+
+<!--
+    TODO
+    <script type="application/javascript" src="test_features2d.js"></script>
+-->
+
+    </body>
+</html>
diff --git a/modules/js/test/tests.js b/modules/js/test/tests.js
new file mode 100644 (file)
index 0000000..cae9bac
--- /dev/null
@@ -0,0 +1,53 @@
+//  //////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+
+let testrunner = require('qunit');
+testrunner.options.maxBlockDuration = 20000; // cause opencv_js.js need time to load
+
+testrunner.run(
+    {
+        code: 'opencv.js',
+        tests: ['test_mat.js', 'test_utils.js', 'test_imgproc.js',
+                'test_objdetect.js', 'test_video.js'],
+    },
+    function(err, report) {
+        console.log(report.failed + ' failed, ' + report.passed + ' passed');
+    }
+);
index 862d564..4e330af 100644 (file)
@@ -1,2 +1,2 @@
 set(the_description "Object Detection")
-ocv_define_module(objdetect opencv_core opencv_imgproc WRAP java python)
+ocv_define_module(objdetect opencv_core opencv_imgproc WRAP java python js)
index 85fad54..40ebf56 100644 (file)
@@ -352,22 +352,54 @@ struct DetectionROI
    std::vector<double> confidences;
 };
 
-/**@example peopledetect.cpp
+/**@brief Implementation of HOG (Histogram of Oriented Gradients) descriptor and object detector.
+
+the HOG descriptor algorithm introduced by Navneet Dalal and Bill Triggs @cite Dalal2005 .
+
+useful links:
+
+https://hal.inria.fr/inria-00548512/document/
+
+https://en.wikipedia.org/wiki/Histogram_of_oriented_gradients
+
+https://software.intel.com/en-us/ipp-dev-reference-histogram-of-oriented-gradients-hog-descriptor
+
+http://www.learnopencv.com/histogram-of-oriented-gradients
+
+http://www.learnopencv.com/handwritten-digits-classification-an-opencv-c-python-tutorial
+
  */
 struct CV_EXPORTS_W HOGDescriptor
 {
 public:
-    enum { L2Hys = 0
+    enum { L2Hys = 0 //!< Default histogramNormType
          };
-    enum { DEFAULT_NLEVELS = 64
+    enum { DEFAULT_NLEVELS = 64 //!< Default nlevels value.
          };
+    /**@brief Creates the HOG descriptor and detector with default params.
 
+    aqual to HOGDescriptor(Size(64,128), Size(16,16), Size(8,8), Size(8,8), 9, 1 )
+    */
     CV_WRAP HOGDescriptor() : winSize(64,128), blockSize(16,16), blockStride(8,8),
         cellSize(8,8), nbins(9), derivAperture(1), winSigma(-1),
         histogramNormType(HOGDescriptor::L2Hys), L2HysThreshold(0.2), gammaCorrection(true),
         free_coef(-1.f), nlevels(HOGDescriptor::DEFAULT_NLEVELS), signedGradient(false)
     {}
 
+    /** @overload
+    @param _winSize sets winSize with given value.
+    @param _blockSize sets blockSize with given value.
+    @param _blockStride sets blockStride with given value.
+    @param _cellSize sets cellSize with given value.
+    @param _nbins sets nbins with given value.
+    @param _derivAperture sets derivAperture with given value.
+    @param _winSigma sets winSigma with given value.
+    @param _histogramNormType sets histogramNormType with given value.
+    @param _L2HysThreshold sets L2HysThreshold with given value.
+    @param _gammaCorrection sets gammaCorrection with given value.
+    @param _nlevels sets nlevels with given value.
+    @param _signedGradient sets signedGradient with given value.
+    */
     CV_WRAP HOGDescriptor(Size _winSize, Size _blockSize, Size _blockStride,
                   Size _cellSize, int _nbins, int _derivAperture=1, double _winSigma=-1,
                   int _histogramNormType=HOGDescriptor::L2Hys,
@@ -379,97 +411,262 @@ public:
     gammaCorrection(_gammaCorrection), free_coef(-1.f), nlevels(_nlevels), signedGradient(_signedGradient)
     {}
 
+    /** @overload
+    @param filename the file name containing  HOGDescriptor properties and coefficients of the trained classifier
+    */
     CV_WRAP HOGDescriptor(const String& filename)
     {
         load(filename);
     }
 
+    /** @overload
+    @param d the HOGDescriptor which cloned to create a new one.
+    */
     HOGDescriptor(const HOGDescriptor& d)
     {
         d.copyTo(*this);
     }
 
+    /**@brief Default destructor.
+    */
     virtual ~HOGDescriptor() {}
 
+    /**@brief Returns the number of coefficients required for the classification.
+    */
     CV_WRAP size_t getDescriptorSize() const;
+
+    /** @brief Checks if detector size equal to descriptor size.
+    */
     CV_WRAP bool checkDetectorSize() const;
+
+    /** @brief Returns winSigma value
+    */
     CV_WRAP double getWinSigma() const;
 
+    /**@example peopledetect.cpp
+    */
+    /**@brief Sets coefficients for the linear SVM classifier.
+    @param _svmdetector coefficients for the linear SVM classifier.
+    */
     CV_WRAP virtual void setSVMDetector(InputArray _svmdetector);
 
+    /** @brief Reads HOGDescriptor parameters from a file node.
+    @param fn File node
+    */
     virtual bool read(FileNode& fn);
+
+    /** @brief Stores HOGDescriptor parameters in a file storage.
+    @param fs File storage
+    @param objname Object name
+    */
     virtual void write(FileStorage& fs, const String& objname) const;
 
+    /** @brief loads coefficients for the linear SVM classifier from a file
+    @param filename Name of the file to read.
+    @param objname The optional name of the node to read (if empty, the first top-level node will be used).
+    */
     CV_WRAP virtual bool load(const String& filename, const String& objname = String());
+
+    /** @brief saves coefficients for the linear SVM classifier to a file
+    @param filename File name
+    @param objname Object name
+    */
     CV_WRAP virtual void save(const String& filename, const String& objname = String()) const;
+
+    /** @brief clones the HOGDescriptor
+    @param c cloned HOGDescriptor
+    */
     virtual void copyTo(HOGDescriptor& c) const;
 
+    /**@example train_HOG.cpp
+    */
+    /** @brief Computes HOG descriptors of given image.
+    @param img Matrix of the type CV_8U containing an image where HOG features will be calculated.
+    @param descriptors Matrix of the type CV_32F
+    @param winStride Window stride. It must be a multiple of block stride.
+    @param padding Padding
+    @param locations Vector of Point
+    */
     CV_WRAP virtual void compute(InputArray img,
                          CV_OUT std::vector<float>& descriptors,
                          Size winStride = Size(), Size padding = Size(),
                          const std::vector<Point>& locations = std::vector<Point>()) const;
 
-    //! with found weights output
+    /** @brief Performs object detection without a multi-scale window.
+    @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected.
+    @param foundLocations Vector of point where each point contains left-top corner point of detected object boundaries.
+    @param weights Vector that will contain confidence values for each detected object.
+    @param hitThreshold Threshold for the distance between features and SVM classifying plane.
+    Usually it is 0 and should be specfied in the detector coefficients (as the last free coefficient).
+    But if the free coefficient is omitted (which is allowed), you can specify it manually here.
+    @param winStride Window stride. It must be a multiple of block stride.
+    @param padding Padding
+    @param searchLocations Vector of Point includes set of requrested locations to be evaluated.
+    */
     CV_WRAP virtual void detect(const Mat& img, CV_OUT std::vector<Point>& foundLocations,
                         CV_OUT std::vector<double>& weights,
                         double hitThreshold = 0, Size winStride = Size(),
                         Size padding = Size(),
                         const std::vector<Point>& searchLocations = std::vector<Point>()) const;
-    //! without found weights output
+
+    /** @brief Performs object detection without a multi-scale window.
+    @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected.
+    @param foundLocations Vector of point where each point contains left-top corner point of detected object boundaries.
+    @param hitThreshold Threshold for the distance between features and SVM classifying plane.
+    Usually it is 0 and should be specfied in the detector coefficients (as the last free coefficient).
+    But if the free coefficient is omitted (which is allowed), you can specify it manually here.
+    @param winStride Window stride. It must be a multiple of block stride.
+    @param padding Padding
+    @param searchLocations Vector of Point includes locations to search.
+    */
     virtual void detect(const Mat& img, CV_OUT std::vector<Point>& foundLocations,
                         double hitThreshold = 0, Size winStride = Size(),
                         Size padding = Size(),
                         const std::vector<Point>& searchLocations=std::vector<Point>()) const;
 
-    //! with result weights output
+    /** @brief Detects objects of different sizes in the input image. The detected objects are returned as a list
+    of rectangles.
+    @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected.
+    @param foundLocations Vector of rectangles where each rectangle contains the detected object.
+    @param foundWeights Vector that will contain confidence values for each detected object.
+    @param hitThreshold Threshold for the distance between features and SVM classifying plane.
+    Usually it is 0 and should be specfied in the detector coefficients (as the last free coefficient).
+    But if the free coefficient is omitted (which is allowed), you can specify it manually here.
+    @param winStride Window stride. It must be a multiple of block stride.
+    @param padding Padding
+    @param scale Coefficient of the detection window increase.
+    @param finalThreshold Final threshold
+    @param useMeanshiftGrouping indicates grouping algorithm
+    */
     CV_WRAP virtual void detectMultiScale(InputArray img, CV_OUT std::vector<Rect>& foundLocations,
                                   CV_OUT std::vector<double>& foundWeights, double hitThreshold = 0,
                                   Size winStride = Size(), Size padding = Size(), double scale = 1.05,
                                   double finalThreshold = 2.0,bool useMeanshiftGrouping = false) const;
-    //! without found weights output
+
+    /** @brief Detects objects of different sizes in the input image. The detected objects are returned as a list
+    of rectangles.
+    @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected.
+    @param foundLocations Vector of rectangles where each rectangle contains the detected object.
+    @param hitThreshold Threshold for the distance between features and SVM classifying plane.
+    Usually it is 0 and should be specfied in the detector coefficients (as the last free coefficient).
+    But if the free coefficient is omitted (which is allowed), you can specify it manually here.
+    @param winStride Window stride. It must be a multiple of block stride.
+    @param padding Padding
+    @param scale Coefficient of the detection window increase.
+    @param finalThreshold Final threshold
+    @param useMeanshiftGrouping indicates grouping algorithm
+    */
     virtual void detectMultiScale(InputArray img, CV_OUT std::vector<Rect>& foundLocations,
                                   double hitThreshold = 0, Size winStride = Size(),
                                   Size padding = Size(), double scale = 1.05,
                                   double finalThreshold = 2.0, bool useMeanshiftGrouping = false) const;
 
+    /** @brief  Computes gradients and quantized gradient orientations.
+    @param img Matrix contains the image to be computed
+    @param grad Matrix of type CV_32FC2 contains computed gradients
+    @param angleOfs Matrix of type CV_8UC2 contains quantized gradient orientations
+    @param paddingTL Padding from top-left
+    @param paddingBR Padding from bottom-right
+    */
     CV_WRAP virtual void computeGradient(const Mat& img, CV_OUT Mat& grad, CV_OUT Mat& angleOfs,
                                  Size paddingTL = Size(), Size paddingBR = Size()) const;
 
+    /** @brief Returns coefficients of the classifier trained for people detection (for 64x128 windows).
+    */
     CV_WRAP static std::vector<float> getDefaultPeopleDetector();
+
+    /**@example hog.cpp
+    */
+    /** @brief Returns coefficients of the classifier trained for people detection (for 48x96 windows).
+    */
     CV_WRAP static std::vector<float> getDaimlerPeopleDetector();
 
+    //! Detection window size. Align to block size and block stride. Default value is Size(64,128).
     CV_PROP Size winSize;
+
+    //! Block size in pixels. Align to cell size. Default value is Size(16,16).
     CV_PROP Size blockSize;
+
+    //! Block stride. It must be a multiple of cell size. Default value is Size(8,8).
     CV_PROP Size blockStride;
+
+    //! Cell size. Default value is Size(8,8).
     CV_PROP Size cellSize;
+
+    //! Number of bins used in the calculation of histogram of gradients. Default value is 9.
     CV_PROP int nbins;
+
+    //! not documented
     CV_PROP int derivAperture;
+
+    //! Gaussian smoothing window parameter.
     CV_PROP double winSigma;
+
+    //! histogramNormType
     CV_PROP int histogramNormType;
+
+    //! L2-Hys normalization method shrinkage.
     CV_PROP double L2HysThreshold;
+
+    //! Flag to specify whether the gamma correction preprocessing is required or not.
     CV_PROP bool gammaCorrection;
+
+    //! coefficients for the linear SVM classifier.
     CV_PROP std::vector<float> svmDetector;
+
+    //! coefficients for the linear SVM classifier used when OpenCL is enabled
     UMat oclSvmDetector;
+
+    //! not documented
     float free_coef;
+
+    //! Maximum number of detection window increases. Default value is 64
     CV_PROP int nlevels;
-    CV_PROP bool signedGradient;
 
+    //! Indicates signed gradient will be used or not
+    CV_PROP bool signedGradient;
 
-    //! evaluate specified ROI and return confidence value for each location
+    /** @brief evaluate specified ROI and return confidence value for each location
+    @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected.
+    @param locations Vector of Point
+    @param foundLocations Vector of Point where each Point is detected object's top-left point.
+    @param confidences confidences
+    @param hitThreshold Threshold for the distance between features and SVM classifying plane. Usually
+    it is 0 and should be specfied in the detector coefficients (as the last free coefficient). But if
+    the free coefficient is omitted (which is allowed), you can specify it manually here
+    @param winStride winStride
+    @param padding padding
+    */
     virtual void detectROI(const cv::Mat& img, const std::vector<cv::Point> &locations,
                                    CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
                                    double hitThreshold = 0, cv::Size winStride = Size(),
                                    cv::Size padding = Size()) const;
 
-    //! evaluate specified ROI and return confidence value for each location in multiple scales
+    /** @brief evaluate specified ROI and return confidence value for each location in multiple scales
+    @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected.
+    @param foundLocations Vector of rectangles where each rectangle contains the detected object.
+    @param locations Vector of DetectionROI
+    @param hitThreshold Threshold for the distance between features and SVM classifying plane. Usually it is 0 and should be specfied
+    in the detector coefficients (as the last free coefficient). But if the free coefficient is omitted (which is allowed), you can specify it manually here.
+    @param groupThreshold Minimum possible number of rectangles minus 1. The threshold is used in a group of rectangles to retain it.
+    */
     virtual void detectMultiScaleROI(const cv::Mat& img,
-                                                       CV_OUT std::vector<cv::Rect>& foundLocations,
-                                                       std::vector<DetectionROI>& locations,
-                                                       double hitThreshold = 0,
-                                                       int groupThreshold = 0) const;
+                                     CV_OUT std::vector<cv::Rect>& foundLocations,
+                                     std::vector<DetectionROI>& locations,
+                                     double hitThreshold = 0,
+                                     int groupThreshold = 0) const;
 
-    //! read/parse Dalal's alt model file
+    /** @brief read/parse Dalal's alt model file
+    @param modelfile Path of Dalal's alt model file.
+    */
     void readALTModel(String modelfile);
+
+    /** @brief Groups the object candidate rectangles.
+    @param rectList  Input/output vector of rectangles. Output vector includes retained and grouped rectangles. (The Python list is not modified in place.)
+    @param weights Input/output vector of weights of rectangles. Output vector includes weights of retained and grouped rectangles. (The Python list is not modified in place.)
+    @param groupThreshold Minimum possible number of rectangles minus 1. The threshold is used in a group of rectangles to retain it.
+    @param eps Relative difference between sides of the rectangles to merge them into a group.
+    */
     void groupRectangles(std::vector<cv::Rect>& rectList, std::vector<double>& weights, int groupThreshold, double eps) const;
 };
 
index 0b7868a..9b21f19 100644 (file)
@@ -64,7 +64,7 @@ void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps
 
     if( groupThreshold <= 0 || rectList.empty() )
     {
-        if( weights )
+        if( weights && !levelWeights )
         {
             size_t i, sz = rectList.size();
             weights->resize(sz);
index 61afe6a..1ff7779 100755 (executable)
@@ -105,6 +105,14 @@ class CppHeaderParser(object):
             modlist.append("/CA " + macro_arg)
             arg_str = arg_str[:npos] + arg_str[npos3+1:]
 
+        npos = arg_str.find("const")
+        if npos >= 0:
+            modlist.append("/C")
+
+        npos = arg_str.find("&")
+        if npos >= 0:
+            modlist.append("/Ref")
+
         arg_str = arg_str.strip()
         word_start = 0
         word_list = []
@@ -406,13 +414,27 @@ class CppHeaderParser(object):
             func_modlist.append("="+arg)
             decl_str = decl_str[:npos] + decl_str[npos3+1:]
 
+        virtual_method = False
+        pure_virtual_method = False
+        const_method = False
+
         # filter off some common prefixes, which are meaningless for Python wrappers.
         # note that we do not strip "static" prefix, which does matter;
         # it means class methods, not instance methods
-        decl_str = self.batch_replace(decl_str, [("virtual", ""), ("static inline", ""), ("inline", ""),\
+        decl_str = self.batch_replace(decl_str, [("static inline", ""), ("inline", ""),\
             ("CV_EXPORTS_W", ""), ("CV_EXPORTS", ""), ("CV_CDECL", ""), ("CV_WRAP ", " "), ("CV_INLINE", ""),
             ("CV_DEPRECATED", "")]).strip()
 
+
+        if decl_str.strip().startswith('virtual'):
+            virtual_method = True
+
+        decl_str = decl_str.replace('virtual' , '')
+
+        end_tokens = decl_str[decl_str.rfind(')'):].split()
+        const_method = 'const' in end_tokens
+        pure_virtual_method = '=' in end_tokens and '0' in end_tokens
+
         static_method = False
         context = top[0]
         if decl_str.startswith("static") and (context == "class" or context == "struct"):
@@ -575,6 +597,12 @@ class CppHeaderParser(object):
 
         if static_method:
             func_modlist.append("/S")
+        if const_method:
+            func_modlist.append("/C")
+        if virtual_method:
+            func_modlist.append("/V")
+        if pure_virtual_method:
+            func_modlist.append("/PV")
 
         return [funcname, rettype, func_modlist, args, original_type, docstring]
 
index b9902ca..6991b47 100644 (file)
@@ -86,7 +86,7 @@ PlanarTarget = namedtuple('PlaneTarget', 'image, rect, keypoints, descrs, data')
   p0     - matched points coords in target image
   p1     - matched points coords in input frame
   H      - homography matrix from p0 to p1
-  quad   - target bounary quad in input frame
+  quad   - target boundary quad in input frame
 '''
 TrackedTarget = namedtuple('TrackedTarget', 'target, p0, p1, H, quad')
 
index 4e5b03d..fa0b0b8 100644 (file)
@@ -166,12 +166,12 @@ PERF_TEST_P( matchVector, bestOf2NearestVectorFeatures, testing::Combine(
         if (pairwise_matches[i].src_img_idx < 0)
             continue;
 
-        EXPECT_TRUE(pairwise_matches[i].matches.size() > 100);
+        EXPECT_GT(pairwise_matches[i].matches.size(), 95u);
         EXPECT_FALSE(pairwise_matches[i].H.empty());
         ++matches_count;
     }
 
-    EXPECT_TRUE(matches_count > 0);
+    EXPECT_GT(matches_count, 0u);
 
     SANITY_CHECK_NOTHING();
 }
index 6f2562b..bb3914c 100644 (file)
@@ -63,7 +63,7 @@ void ExposureCompensator::feed(const std::vector<Point> &corners, const std::vec
 {
     std::vector<std::pair<UMat,uchar> > level_masks;
     for (size_t i = 0; i < masks.size(); ++i)
-        level_masks.push_back(std::make_pair(masks[i], 255));
+        level_masks.push_back(std::make_pair(masks[i], (uchar)255));
     feed(corners, images, level_masks);
 }
 
index 46a555e..d630a5a 100644 (file)
@@ -58,8 +58,8 @@
 #  define GTEST_USES_POSIX_RE 0
 #endif
 
-#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
-#define GET_PARAM(k) std::tr1::get< k >(GetParam())
+#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< testing::tuple< __VA_ARGS__ > >
+#define GET_PARAM(k) testing::get< k >(GetParam())
 
 namespace cvtest
 {
@@ -70,6 +70,13 @@ using namespace cv;
 using testing::Values;
 using testing::Combine;
 
+// Tuple stuff from Google Tests
+using testing::get;
+using testing::make_tuple;
+using testing::tuple;
+using testing::tuple_size;
+using testing::tuple_element;
+
 
 class SkipTestException: public cv::Exception
 {
index 3c162ad..c0c2b7d 100644 (file)
@@ -62,9 +62,9 @@ namespace perf
     #define CUDA_CHANNELS_1_3_4 testing::Values(MatCn(Gray), MatCn(BGR), MatCn(BGRA))
     #define CUDA_CHANNELS_1_3 testing::Values(MatCn(Gray), MatCn(BGR))
 
-    #define GET_PARAM(k) std::tr1::get< k >(GetParam())
+    #define GET_PARAM(k) testing::get< k >(GetParam())
 
-    #define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name
+    #define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< testing::tuple< __VA_ARGS__ > > name
     #define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name
 
     DEF_PARAM_TEST_1(Sz, cv::Size);
index 4d57aaa..ce25b71 100644 (file)
@@ -52,9 +52,6 @@ namespace ocl {
 
 using namespace perf;
 
-using std::tr1::get;
-using std::tr1::tuple;
-
 #define OCL_PERF_STRATEGY PERF_STRATEGY_SIMPLE
 
 #define OCL_PERF_TEST(fixture, name) SIMPLE_PERF_TEST(fixture, name)
index 54b33ec..294fab4 100644 (file)
@@ -325,7 +325,7 @@ struct CV_EXPORTS TSTestWithParam : public TestUtils, public ::testing::TestWith
 };
 
 #undef PARAM_TEST_CASE
-#define PARAM_TEST_CASE(name, ...) struct name : public ::cvtest::ocl::TSTestWithParam< std::tr1::tuple< __VA_ARGS__ > >
+#define PARAM_TEST_CASE(name, ...) struct name : public ::cvtest::ocl::TSTestWithParam< testing::tuple< __VA_ARGS__ > >
 
 #ifndef IMPLEMENT_PARAM_CLASS
 #define IMPLEMENT_PARAM_CLASS(name, type) \
index 0bdd346..259f869 100644 (file)
@@ -12,17 +12,17 @@ namespace cvtest {
 void checkIppStatus();
 }
 
-#define CV_TEST_INIT \
+#define CV__TEST_INIT \
     cv::ipp::setIppStatus(0); \
     cv::theRNG().state = cvtest::param_seed;
-#define CV_TEST_CLEANUP ::cvtest::checkIppStatus();
-#define CV_TEST_BODY_IMPL(name) \
+#define CV__TEST_CLEANUP ::cvtest::checkIppStatus();
+#define CV__TEST_BODY_IMPL(name) \
     { \
        CV__TRACE_APP_FUNCTION_NAME(name); \
        try { \
-          CV_TEST_INIT \
+          CV__TEST_INIT \
           Body(); \
-          CV_TEST_CLEANUP \
+          CV__TEST_CLEANUP \
        } \
        catch (cvtest::SkipTestException& e) \
        { \
@@ -54,7 +54,7 @@ void checkIppStatus();
             ::testing::Test::TearDownTestCase, \
             new ::testing::internal::TestFactoryImpl<\
                 GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\
-    void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() CV_TEST_BODY_IMPL( #test_case_name "_" #test_name ) \
+    void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() CV__TEST_BODY_IMPL( #test_case_name "_" #test_name ) \
     void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::Body()
 
 #undef TEST_F
@@ -80,17 +80,17 @@ void checkIppStatus();
             test_fixture::TearDownTestCase, \
             new ::testing::internal::TestFactoryImpl<\
                 GTEST_TEST_CLASS_NAME_(test_fixture, test_name)>);\
-    void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::TestBody() CV_TEST_BODY_IMPL( #test_fixture "_" #test_name ) \
+    void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::TestBody() CV__TEST_BODY_IMPL( #test_fixture "_" #test_name ) \
     void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::Body()
 
-#undef TEST_P
-#define TEST_P(test_case_name, test_name) \
+// Don't use directly
+#define CV__TEST_P(test_case_name, test_name, bodyMethodName, BODY_IMPL/*(name_str)*/) \
   class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
       : public test_case_name { \
    public: \
     GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \
    private: \
-    virtual void Body(); \
+    virtual void bodyMethodName(); \
     virtual void TestBody(); \
     static int AddToRegistry() { \
       ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
@@ -112,7 +112,10 @@ void checkIppStatus();
   int GTEST_TEST_CLASS_NAME_(test_case_name, \
                              test_name)::gtest_registering_dummy_ = \
       GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
-    void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() CV_TEST_BODY_IMPL( #test_case_name "_" #test_name ) \
-    void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::Body()
+    void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() BODY_IMPL( #test_case_name "_" #test_name ) \
+    void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::bodyMethodName()
+
+#undef TEST_P
+#define TEST_P(test_case_name, test_name) CV__TEST_P(test_case_name, test_name, Body, CV__TEST_BODY_IMPL)
 
 #endif  // OPENCV_TS_EXT_HPP
index 37e1586..3781365 100644 (file)
 
 // declare major namespaces to avoid errors on unknown namespace
 namespace cv { namespace cuda {} namespace ocl {} }
+namespace cvtest { }
 
 namespace perf
 {
+
+// Tuple stuff from Google Tests
+using testing::get;
+using testing::make_tuple;
+using testing::tuple;
+using testing::tuple_size;
+using testing::tuple_element;
+
 class TestBase;
 
 /*****************************************************************************************\
 *                Predefined typical frame sizes and typical test parameters               *
 \*****************************************************************************************/
-const cv::Size szQVGA = cv::Size(320, 240);
-const cv::Size szVGA = cv::Size(640, 480);
-const cv::Size szSVGA = cv::Size(800, 600);
-const cv::Size szXGA = cv::Size(1024, 768);
-const cv::Size szSXGA = cv::Size(1280, 1024);
-const cv::Size szWQHD = cv::Size(2560, 1440);
-
-const cv::Size sznHD = cv::Size(640, 360);
-const cv::Size szqHD = cv::Size(960, 540);
-const cv::Size sz240p = szQVGA;
-const cv::Size sz720p = cv::Size(1280, 720);
-const cv::Size sz1080p = cv::Size(1920, 1080);
-const cv::Size sz1440p = szWQHD;
-const cv::Size sz2160p = cv::Size(3840, 2160);//UHDTV1 4K
-const cv::Size sz4320p = cv::Size(7680, 4320);//UHDTV2 8K
-
-const cv::Size sz3MP = cv::Size(2048, 1536);
-const cv::Size sz5MP = cv::Size(2592, 1944);
-const cv::Size sz2K = cv::Size(2048, 2048);
-
-const cv::Size szODD = cv::Size(127, 61);
-
-const cv::Size szSmall24 = cv::Size(24, 24);
-const cv::Size szSmall32 = cv::Size(32, 32);
-const cv::Size szSmall64 = cv::Size(64, 64);
-const cv::Size szSmall128 = cv::Size(128, 128);
+const static cv::Size szQVGA = cv::Size(320, 240);
+const static cv::Size szVGA = cv::Size(640, 480);
+const static cv::Size szSVGA = cv::Size(800, 600);
+const static cv::Size szXGA = cv::Size(1024, 768);
+const static cv::Size szSXGA = cv::Size(1280, 1024);
+const static cv::Size szWQHD = cv::Size(2560, 1440);
+
+const static cv::Size sznHD = cv::Size(640, 360);
+const static cv::Size szqHD = cv::Size(960, 540);
+const static cv::Size sz240p = szQVGA;
+const static cv::Size sz720p = cv::Size(1280, 720);
+const static cv::Size sz1080p = cv::Size(1920, 1080);
+const static cv::Size sz1440p = szWQHD;
+const static cv::Size sz2160p = cv::Size(3840, 2160);//UHDTV1 4K
+const static cv::Size sz4320p = cv::Size(7680, 4320);//UHDTV2 8K
+
+const static cv::Size sz3MP = cv::Size(2048, 1536);
+const static cv::Size sz5MP = cv::Size(2592, 1944);
+const static cv::Size sz2K = cv::Size(2048, 2048);
+
+const static cv::Size szODD = cv::Size(127, 61);
+
+const static cv::Size szSmall24 = cv::Size(24, 24);
+const static cv::Size szSmall32 = cv::Size(32, 32);
+const static cv::Size szSmall64 = cv::Size(64, 64);
+const static cv::Size szSmall128 = cv::Size(128, 128);
 
 #define SZ_ALL_VGA ::testing::Values(::perf::szQVGA, ::perf::szVGA, ::perf::szSVGA)
 #define SZ_ALL_GA  ::testing::Values(::perf::szQVGA, ::perf::szVGA, ::perf::szSVGA, ::perf::szXGA, ::perf::szSXGA)
@@ -492,7 +501,7 @@ public:
 
 template<typename T> class TestBaseWithParam: public TestBase, public ::testing::WithParamInterface<T> {};
 
-typedef std::tr1::tuple<cv::Size, MatType> Size_MatType_t;
+typedef tuple<cv::Size, MatType> Size_MatType_t;
 typedef TestBaseWithParam<Size_MatType_t> Size_MatType;
 
 /*****************************************************************************************\
@@ -514,6 +523,13 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os);
 /*****************************************************************************************\
 *                        Macro definitions for performance tests                          *
 \*****************************************************************************************/
+
+#define CV__PERF_TEST_BODY_IMPL(name) \
+    { \
+       CV__TRACE_APP_FUNCTION_NAME("PERF_TEST: " name); \
+       RunPerfTestBody(); \
+    }
+
 #define PERF_PROXY_NAMESPACE_NAME_(test_case_name, test_name) \
   test_case_name##_##test_name##_perf_namespace_proxy
 
@@ -538,7 +554,7 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os);
       protected:\
        virtual void PerfTestBody();\
      };\
-     TEST_F(test_case_name, test_name){ CV_TRACE_REGION("PERF_TEST: " #test_case_name "_" #test_name); RunPerfTestBody(); }\
+     TEST_F(test_case_name, test_name){ CV__PERF_TEST_BODY_IMPL(#test_case_name "_" #test_name); }\
     }\
     void PERF_PROXY_NAMESPACE_NAME_(test_case_name, test_name)::test_case_name::PerfTestBody()
 
@@ -576,12 +592,20 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os);
       protected:\
        virtual void PerfTestBody();\
      };\
-     TEST_F(fixture, testname){ CV_TRACE_REGION("PERF_TEST: " #fixture "_" #testname); RunPerfTestBody(); }\
+     TEST_F(fixture, testname){ CV__PERF_TEST_BODY_IMPL(#fixture "_" #testname); }\
     }\
     void PERF_PROXY_NAMESPACE_NAME_(fixture, testname)::fixture::PerfTestBody()
 
 // Defines a parametrized performance test.
 //
+// @Note PERF_TEST_P() below violates behavior of original Google Tests - there is no tests instantiation in original TEST_P()
+// This macro is intended for usage with separate INSTANTIATE_TEST_CASE_P macro
+#define PERF_TEST_P_(test_case_name, test_name) CV__TEST_P(test_case_name, test_name, PerfTestBody, CV__PERF_TEST_BODY_IMPL)
+
+// Defines a parametrized performance test.
+//
+// @Note Original TEST_P() macro doesn't instantiate tests with parameters. To keep original usage use PERF_TEST_P_() macro
+//
 // The first parameter is the name of the test fixture class, which
 // also doubles as the test case name.  The second parameter is the
 // name of the test within the test case.
@@ -609,7 +633,7 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os);
      protected:\
       virtual void PerfTestBody();\
     };\
-    TEST_P(fixture##_##name, name /*perf*/){ CV_TRACE_REGION("PERF_TEST: " #fixture "_" #name); RunPerfTestBody(); }\
+    CV__TEST_P(fixture##_##name, name, PerfTestBodyDummy, CV__PERF_TEST_BODY_IMPL){} \
     INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\
     void fixture##_##name::PerfTestBody()
 
index be9bcf5..0ba6235 100644 (file)
@@ -3091,6 +3091,9 @@ void printVersionInfo(bool useStdOut)
 #if CV_FP16
     if (checkHardwareSupport(CV_CPU_FP16)) cpu_features += " fp16";
 #endif
+#if CV_VSX
+    if (checkHardwareSupport(CV_CPU_VSX)) cpu_features += " VSX";
+#endif
 
     cpu_features.erase(0, 1); // erase initial space
 
index 15705cf..df5e49b 100644 (file)
@@ -1,2 +1,2 @@
 set(the_description "Video Analysis")
-ocv_define_module(video opencv_imgproc WRAP java python)
+ocv_define_module(video opencv_imgproc WRAP java python js)
index b48c52b..0ded292 100644 (file)
@@ -222,10 +222,7 @@ if(IOS)
        ${CMAKE_CURRENT_LIST_DIR}/src/cap_ios_photo_camera.mm
        ${CMAKE_CURRENT_LIST_DIR}/src/cap_ios_video_camera.mm)
 
-  list(APPEND VIDEOIO_LIBRARIES "-framework Accelerate" "-framework AVFoundation" "-framework CoreGraphics" "-framework CoreImage" "-framework CoreMedia" "-framework CoreVideo" "-framework QuartzCore" "-framework AssetsLibrary")
-  if(APPLE_FRAMEWORK)
-    list(APPEND VIDEOIO_LIBRARIES "-framework UIKit")
-  endif()
+  list(APPEND VIDEOIO_LIBRARIES "-framework Accelerate" "-framework AVFoundation" "-framework CoreGraphics" "-framework CoreImage" "-framework CoreMedia" "-framework CoreVideo" "-framework QuartzCore" "-framework UIKit")
 endif()
 
 if(WIN32)
index b3fc18c..0d674fe 100644 (file)
@@ -603,9 +603,9 @@ bool CvCaptureCAM_DC1394_v2_CPP::grabFrame()
         cvInitImageHeader(&fhdr, cvSize(fc->size[0], fc->size[1]), 8, nch);
         cvSetData(&fhdr, fc->image, fc->size[0]*nch);
 
-    // Swap R&B channels:
-    if (nch==3)
-        cvConvertImage(&fhdr,&fhdr,CV_CVTIMG_SWAP_RB);
+        // Swap R&B channels:
+        if (nch==3)
+            cvConvertImage(&fhdr,&fhdr,CV_CVTIMG_SWAP_RB);
 
         if( rectify && cameraId == VIDERE && nimages == 2 )
         {
index 8a19a47..94c1bab 100644 (file)
@@ -31,7 +31,7 @@
 
 #import "opencv2/videoio/cap_ios.h"
 #include "precomp.hpp"
-#import <AssetsLibrary/AssetsLibrary.h>
+#import <UIKit/UIKit.h>
 
 
 static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}
@@ -626,11 +626,7 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}
         return;
     }
 
-    ALAssetsLibrary *library = [[ALAssetsLibrary alloc] init];
-    if ([library videoAtPathIsCompatibleWithSavedPhotosAlbum:[self videoFileURL]]) {
-        [library writeVideoAtPathToSavedPhotosAlbum:[self videoFileURL]
-                                    completionBlock:^(NSURL *assetURL, NSError *error){ (void)assetURL; (void)error; }];
-    }
+    UISaveVideoAtPathToSavedPhotosAlbum([self videoFileString], nil, nil, NULL);
 }
 
 
diff --git a/platforms/js/README.md b/platforms/js/README.md
new file mode 100644 (file)
index 0000000..1db8a84
--- /dev/null
@@ -0,0 +1,15 @@
+Building OpenCV.js by Emscripten
+====================
+
+[Download and install Emscripten](https://kripken.github.io/emscripten-site/docs/getting_started/downloads.html).
+
+Execute `build_js.py` script:
+```
+python <opencv_src_dir>/platforms/js/build_js.py <build_dir>
+```
+
+If everything is fine, a few minutes later you will get `<build_dir>/bin/opencv.js`. You can add this into your web pages.
+
+Find out more build options by `-h` switch.
+
+For detailed build tutorial, check out `<opencv_src_dir>/doc/js_tutorials/js_setup/js_setup/js_setup.markdown`.
diff --git a/platforms/js/build_js.py b/platforms/js/build_js.py
new file mode 100644 (file)
index 0000000..f58625d
--- /dev/null
@@ -0,0 +1,278 @@
+#!/usr/bin/env python
+
+import os, sys, subprocess, argparse, shutil, glob, re, multiprocessing
+import logging as log
+
+class Fail(Exception):
+    def __init__(self, text=None):
+        self.t = text
+    def __str__(self):
+        return "ERROR" if self.t is None else self.t
+
+def execute(cmd, shell=False):
+    try:
+        log.info("Executing: %s" % cmd)
+        retcode = subprocess.call(cmd, shell=shell)
+        if retcode < 0:
+            raise Fail("Child was terminated by signal:" %s -retcode)
+        elif retcode > 0:
+            raise Fail("Child returned: %s" % retcode)
+    except OSError as e:
+        raise Fail("Execution failed: %d / %s" % (e.errno, e.strerror))
+
+def rm_one(d):
+    d = os.path.abspath(d)
+    if os.path.exists(d):
+        if os.path.isdir(d):
+            log.info("Removing dir: %s", d)
+            shutil.rmtree(d)
+        elif os.path.isfile(d):
+            log.info("Removing file: %s", d)
+            os.remove(d)
+
+def check_dir(d, create=False, clean=False):
+    d = os.path.abspath(d)
+    log.info("Check dir %s (create: %s, clean: %s)", d, create, clean)
+    if os.path.exists(d):
+        if not os.path.isdir(d):
+            raise Fail("Not a directory: %s" % d)
+        if clean:
+            for x in glob.glob(os.path.join(d, "*")):
+                rm_one(x)
+    else:
+        if create:
+            os.makedirs(d)
+    return d
+
+def check_file(d):
+    d = os.path.abspath(d)
+    if os.path.exists(d):
+        if os.path.isfile(d):
+            return True
+        else:
+            return False
+    return False
+
+def find_file(name, path):
+    for root, dirs, files in os.walk(path):
+        if name in files:
+            return os.path.join(root, name)
+
+def determine_emcc_version(emscripten_dir):
+    ret = subprocess.check_output([os.path.join(emscripten_dir, "emcc"), "--version"])
+    m = re.match(r'^emcc.*(\d+\.\d+\.\d+)', ret, flags=re.IGNORECASE)
+    return m.group(1)
+
+def determine_opencv_version(version_hpp_path):
+    # version in 2.4 - CV_VERSION_EPOCH.CV_VERSION_MAJOR.CV_VERSION_MINOR.CV_VERSION_REVISION
+    # version in master - CV_VERSION_MAJOR.CV_VERSION_MINOR.CV_VERSION_REVISION-CV_VERSION_STATUS
+    with open(version_hpp_path, "rt") as f:
+        data = f.read()
+        major = re.search(r'^#define\W+CV_VERSION_MAJOR\W+(\d+)$', data, re.MULTILINE).group(1)
+        minor = re.search(r'^#define\W+CV_VERSION_MINOR\W+(\d+)$', data, re.MULTILINE).group(1)
+        revision = re.search(r'^#define\W+CV_VERSION_REVISION\W+(\d+)$', data, re.MULTILINE).group(1)
+        version_status = re.search(r'^#define\W+CV_VERSION_STATUS\W+"([^"]*)"$', data, re.MULTILINE).group(1)
+        return "%(major)s.%(minor)s.%(revision)s%(version_status)s" % locals()
+
+class Builder:
+    def __init__(self, options):
+        self.options = options
+        self.build_dir = check_dir(options.build_dir, create=True)
+        self.opencv_dir = check_dir(options.opencv_dir)
+        self.emscripten_dir = check_dir(options.emscripten_dir)
+        self.opencv_version = determine_opencv_version(os.path.join(self.opencv_dir, "modules", "core", "include", "opencv2", "core", "version.hpp"))
+        self.emcc_version = determine_emcc_version(self.emscripten_dir)
+
+    def get_toolchain_file(self):
+        return os.path.join(self.emscripten_dir, "cmake", "Modules", "Platform", "Emscripten.cmake")
+
+    def clean_build_dir(self):
+        for d in ["CMakeCache.txt", "CMakeFiles/", "bin/", "libs/", "lib/", "modules"]:
+            rm_one(d)
+
+    def get_cmake_cmd(self):
+        cmd = ["cmake",
+               "-DCMAKE_BUILD_TYPE=Release",
+               "-DCMAKE_TOOLCHAIN_FILE='%s'" % self.get_toolchain_file(),
+               "-DCPU_BASELINE=''",
+               "-DCPU_DISPATCH=''",
+               "-DCV_TRACE=OFF",
+               "-DBUILD_SHARED_LIBS=OFF",
+               "-DWITH_1394=OFF",
+               "-DWITH_VTK=OFF",
+               "-DWITH_CUDA=OFF",
+               "-DWITH_CUFFT=OFF",
+               "-DWITH_CUBLAS=OFF",
+               "-DWITH_NVCUVID=OFF",
+               "-DWITH_EIGEN=OFF",
+               "-DWITH_FFMPEG=OFF",
+               "-DWITH_GSTREAMER=OFF",
+               "-DWITH_GTK=OFF",
+               "-DWITH_GTK_2_X=OFF",
+               "-DWITH_IPP=OFF",
+               "-DWITH_JASPER=OFF",
+               "-DWITH_JPEG=OFF",
+               "-DWITH_WEBP=OFF",
+               "-DWITH_OPENEXR=OFF",
+               "-DWITH_OPENGL=OFF",
+               "-DWITH_OPENVX=OFF",
+               "-DWITH_OPENNI=OFF",
+               "-DWITH_OPENNI2=OFF",
+               "-DWITH_PNG=OFF",
+               "-DWITH_TBB=OFF",
+               "-DWITH_PTHREADS_PF=OFF",
+               "-DWITH_TIFF=OFF",
+               "-DWITH_V4L=OFF",
+               "-DWITH_OPENCL=OFF",
+               "-DWITH_OPENCL_SVM=OFF",
+               "-DWITH_OPENCLAMDFFT=OFF",
+               "-DWITH_OPENCLAMDBLAS=OFF",
+               "-DWITH_MATLAB=OFF",
+               "-DWITH_GPHOTO2=OFF",
+               "-DWITH_LAPACK=OFF",
+               "-DWITH_ITT=OFF",
+               "-DBUILD_ZLIB=ON",
+               "-DBUILD_opencv_apps=OFF",
+               "-DBUILD_opencv_calib3d=OFF",
+               "-DBUILD_opencv_dnn=OFF",
+               "-DBUILD_opencv_features2d=OFF",
+               "-DBUILD_opencv_flann=OFF",
+               "-DBUILD_opencv_ml=OFF",
+               "-DBUILD_opencv_photo=OFF",
+               "-DBUILD_opencv_imgcodecs=OFF",
+               "-DBUILD_opencv_shape=OFF",
+               "-DBUILD_opencv_videoio=OFF",
+               "-DBUILD_opencv_videostab=OFF",
+               "-DBUILD_opencv_highgui=OFF",
+               "-DBUILD_opencv_superres=OFF",
+               "-DBUILD_opencv_stitching=OFF",
+               "-DBUILD_opencv_java=OFF",
+               "-DBUILD_opencv_js=ON",
+               "-DBUILD_opencv_python2=OFF",
+               "-DBUILD_opencv_python3=OFF",
+               "-DBUILD_EXAMPLES=OFF",
+               "-DBUILD_PACKAGE=OFF",
+               "-DBUILD_TESTS=OFF",
+               "-DBUILD_PERF_TESTS=OFF"]
+        if self.options.build_doc:
+            cmd.append("-DBUILD_DOCS=ON")
+        else:
+            cmd.append("-DBUILD_DOCS=OFF")
+
+        flags = self.get_build_flags()
+        if flags:
+            cmd += ["-DCMAKE_C_FLAGS='%s'" % flags,
+                    "-DCMAKE_CXX_FLAGS='%s'" % flags]
+        return cmd;
+
+    def get_build_flags(self):
+        flags = ""
+        if self.options.build_wasm:
+            flags += "-s WASM=1 "
+        if self.options.enable_exception:
+            flags += "-s DISABLE_EXCEPTION_CATCHING=0 "
+        return flags
+
+    def config(self):
+        cmd = self.get_cmake_cmd()
+        cmd.append(self.opencv_dir)
+        execute(cmd)
+
+    def build_opencvjs(self):
+        execute(["make", "-j", str(multiprocessing.cpu_count()), "opencv.js"])
+
+    def build_test(self):
+        execute(["make", "-j", str(multiprocessing.cpu_count()), "opencv_js_test"])
+
+    def build_doc(self):
+        execute(["make", "-j", str(multiprocessing.cpu_count()), "doxygen"])
+
+
+#===================================================================================================
+
+if __name__ == "__main__":
+    opencv_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), "../.."))
+    emscripten_dir = None
+    if "EMSCRIPTEN" in os.environ:
+        emscripten_dir = os.environ["EMSCRIPTEN"]
+
+    parser = argparse.ArgumentParser(description='Build OpenCV.js by Emscripten')
+    parser.add_argument("build_dir", help="Building directory (and output)")
+    parser.add_argument('--opencv_dir', default=opencv_dir, help='Opencv source directory (default is "../.." relative to script location)')
+    parser.add_argument('--emscripten_dir', default=emscripten_dir, help="Path to Emscripten to use for build")
+    parser.add_argument('--build_wasm', action="store_true", help="Build OpenCV.js in WebAssembly format")
+    parser.add_argument('--build_test', action="store_true", help="Build tests")
+    parser.add_argument('--build_doc', action="store_true", help="Build tutorials")
+    parser.add_argument('--clean_build_dir', action="store_true", help="Clean build dir")
+    parser.add_argument('--skip_config', action="store_true", help="Skip cmake config")
+    parser.add_argument('--config_only', action="store_true", help="Only do cmake config")
+    parser.add_argument('--enable_exception', action="store_true", help="Enable exception handling")
+    args = parser.parse_args()
+
+    log.basicConfig(format='%(message)s', level=log.DEBUG)
+    log.debug("Args: %s", args)
+
+    if args.emscripten_dir is None:
+        log.info("Cannot get Emscripten path, please specify it either by EMSCRIPTEN environment variable or --emscripten_dir option.")
+        sys.exit(-1)
+
+    builder = Builder(args)
+
+    log.info("Detected OpenCV version: %s", builder.opencv_version)
+    log.info("Detected emcc version: %s", builder.emcc_version)
+
+    os.chdir(builder.build_dir)
+
+    if args.clean_build_dir:
+        log.info("=====")
+        log.info("===== Clean build dir %s", builder.build_dir)
+        log.info("=====")
+        builder.clean_build_dir()
+
+    if not args.skip_config:
+        target = "asm.js"
+        if args.build_wasm:
+            target = "wasm"
+        log.info("=====")
+        log.info("===== Config OpenCV.js build for %s" % target)
+        log.info("=====")
+        builder.config()
+
+    if args.config_only:
+        sys.exit(0);
+
+    log.info("=====")
+    log.info("===== Building OpenCV.js in %s", "asm.js" if not args.build_wasm else "wasm")
+    log.info("=====")
+    builder.build_opencvjs()
+
+    if args.build_test:
+        log.info("=====")
+        log.info("===== Building OpenCV.js tests")
+        log.info("=====")
+        builder.build_test()
+
+    if args.build_doc:
+        log.info("=====")
+        log.info("===== Building OpenCV.js tutorials")
+        log.info("=====")
+        builder.build_doc()
+
+
+    log.info("=====")
+    log.info("===== Build finished")
+    log.info("=====")
+
+    opencvjs_path = os.path.join(builder.build_dir, "bin", "opencv.js")
+    if check_file(opencvjs_path):
+        log.info("OpenCV.js location: %s", opencvjs_path)
+
+    if args.build_test:
+        opencvjs_test_path = os.path.join(builder.build_dir, "bin", "tests.html")
+        if check_file(opencvjs_test_path):
+            log.info("OpenCV.js tests location: %s", opencvjs_test_path)
+
+    if args.build_doc:
+        opencvjs_tutorial_path = find_file("tutorial_js_root.html", os.path.join(builder.build_dir, "doc", "doxygen", "html"))
+        if check_file(opencvjs_tutorial_path):
+            log.info("OpenCV.js tutorials location: %s", opencvjs_tutorial_path)
index 6852bb5..31440e2 100644 (file)
@@ -86,7 +86,7 @@ public class MainActivity extends AppCompatActivity implements CvCameraViewListe
         // Forward image through network.
         Mat blob = Dnn.blobFromImage(frame, IN_SCALE_FACTOR,
                 new Size(IN_WIDTH, IN_HEIGHT),
-                new Scalar(MEAN_VAL, MEAN_VAL, MEAN_VAL), true);
+                new Scalar(MEAN_VAL, MEAN_VAL, MEAN_VAL), false);
         net.setInput(blob);
         Mat detections = net.forward();
 
index 0cd6c4a..2948449 100644 (file)
@@ -1,19 +1,25 @@
 /********************************************************************************
-*
-*
-*  This program is demonstration for ellipse fitting. Program finds
-*  contours and approximate it by ellipses.
-*
-*  Trackbar specify threshold parametr.
-*
-*  White lines is contours. Red lines is fitting ellipses.
-*
-*
-*  Autor:  Denis Burenkov.
-*
-*
-*
-********************************************************************************/
+ *
+ *
+ *  This program is demonstration for ellipse fitting. Program finds
+ *  contours and approximate it by ellipses using three methods.
+ *  1: OpenCV's original method fitEllipse which implements Fitzgibbon 1995 method.
+ *  2: The Approximate Mean Square (AMS) method fitEllipseAMS  proposed by Taubin 1991
+ *  3: The Direct least square (Direct) method fitEllipseDirect proposed by Fitzgibbon1999.
+ *
+ *  Trackbar specify threshold parameter.
+ *
+ *  White lines is contours/input points and the true ellipse used to generate the data.
+ *  1: Blue lines is fitting ellipses using openCV's original method.
+ *  2: Green lines is fitting ellipses using the AMS method.
+ *  3: Red lines is fitting ellipses using the Direct method.
+ *
+ *
+ *  Original Author:  Denis Burenkov
+ *  AMS and Direct Methods Autor:  Jasper Shemilt
+ *
+ *
+ ********************************************************************************/
 #include "opencv2/imgproc.hpp"
 #include "opencv2/imgcodecs.hpp"
 #include "opencv2/highgui.hpp"
 using namespace cv;
 using namespace std;
 
+class canvas{
+public:
+    bool setupQ;
+    cv::Point origin;
+    cv::Point corner;
+    int minDims,maxDims;
+    double scale;
+    int rows, cols;
+    cv::Mat img;
+
+    void init(int minD, int maxD){
+        // Initialise the canvas with minimum and maximum rows and column sizes.
+        minDims = minD; maxDims = maxD;
+        origin = cv::Point(0,0);
+        corner = cv::Point(0,0);
+        scale = 1.0;
+        rows = 0;
+        cols = 0;
+        setupQ = false;
+    }
+
+    void stretch(cv::Point2f min, cv::Point2f max){
+        // Stretch the canvas to include the points min and max.
+        if(setupQ){
+            if(corner.x < max.x){corner.x = (int)(max.x + 1.0);};
+            if(corner.y < max.y){corner.y = (int)(max.y + 1.0);};
+            if(origin.x > min.x){origin.x = (int) min.x;};
+            if(origin.y > min.y){origin.y = (int) min.y;};
+        } else {
+            origin = cv::Point((int)min.x, (int)min.y);
+            corner = cv::Point((int)(max.x + 1.0), (int)(max.y + 1.0));
+        }
+
+        int c = (int)(scale*((corner.x + 1.0) - origin.x));
+        if(c<minDims){
+            scale = scale * (double)minDims/(double)c;
+        } else {
+            if(c>maxDims){
+                scale = scale * (double)maxDims/(double)c;
+            }
+        }
+        int r = (int)(scale*((corner.y + 1.0) - origin.y));
+        if(r<minDims){
+            scale = scale * (double)minDims/(double)r;
+        } else {
+            if(r>maxDims){
+                scale = scale * (double)maxDims/(double)r;
+            }
+        }
+        cols = (int)(scale*((corner.x + 1.0) - origin.x));
+        rows = (int)(scale*((corner.y + 1.0) - origin.y));
+        setupQ = true;
+    }
+
+    void stretch(vector<Point2f> pts)
+    {   // Stretch the canvas so all the points pts are on the canvas.
+        cv::Point2f min = pts[0];
+        cv::Point2f max = pts[0];
+        for(size_t i=1; i < pts.size(); i++){
+            Point2f pnt = pts[i];
+            if(max.x < pnt.x){max.x = pnt.x;};
+            if(max.y < pnt.y){max.y = pnt.y;};
+            if(min.x > pnt.x){min.x = pnt.x;};
+            if(min.y > pnt.y){min.y = pnt.y;};
+        };
+        stretch(min, max);
+    }
+
+    void stretch(cv::RotatedRect box)
+    {   // Stretch the canvas so that the rectangle box is on the canvas.
+        cv::Point2f min = box.center;
+        cv::Point2f max = box.center;
+        cv::Point2f vtx[4];
+        box.points(vtx);
+        for( int i = 0; i < 4; i++ ){
+            cv::Point2f pnt = vtx[i];
+            if(max.x < pnt.x){max.x = pnt.x;};
+            if(max.y < pnt.y){max.y = pnt.y;};
+            if(min.x > pnt.x){min.x = pnt.x;};
+            if(min.y > pnt.y){min.y = pnt.y;};
+        }
+        stretch(min, max);
+    }
+
+    void drawEllipseWithBox(cv::RotatedRect box, cv::Scalar color, int lineThickness)
+    {
+        if(img.empty()){
+            stretch(box);
+            img = cv::Mat::zeros(rows,cols,CV_8UC3);
+        }
+
+        box.center = scale * cv::Point2f(box.center.x - origin.x, box.center.y - origin.y);
+        box.size.width  = (float)(scale * box.size.width);
+        box.size.height = (float)(scale * box.size.height);
+
+        ellipse(img, box, color, lineThickness, LINE_AA);
+
+        Point2f vtx[4];
+        box.points(vtx);
+        for( int j = 0; j < 4; j++ ){
+            line(img, vtx[j], vtx[(j+1)%4], color, lineThickness, LINE_AA);
+        }
+    }
+
+    void drawPoints(vector<Point2f> pts, cv::Scalar color)
+    {
+        if(img.empty()){
+            stretch(pts);
+            img = cv::Mat::zeros(rows,cols,CV_8UC3);
+        }
+        for(size_t i=0; i < pts.size(); i++){
+            Point2f pnt = scale * cv::Point2f(pts[i].x - origin.x, pts[i].y - origin.y);
+            img.at<cv::Vec3b>(int(pnt.y), int(pnt.x))[0] = (uchar)color[0];
+            img.at<cv::Vec3b>(int(pnt.y), int(pnt.x))[1] = (uchar)color[1];
+            img.at<cv::Vec3b>(int(pnt.y), int(pnt.x))[2] = (uchar)color[2];
+        };
+    }
+
+    void drawLabels( std::vector<std::string> text, std::vector<cv::Scalar> colors)
+    {
+        if(img.empty()){
+            img = cv::Mat::zeros(rows,cols,CV_8UC3);
+        }
+        int vPos = 0;
+        for (size_t i=0; i < text.size(); i++) {
+            cv::Scalar color = colors[i];
+            std::string txt = text[i];
+            Size textsize = getTextSize(txt, FONT_HERSHEY_COMPLEX, 1, 1, 0);
+            vPos += (int)(1.3 * textsize.height);
+            Point org((img.cols - textsize.width), vPos);
+            cv::putText(img, txt, org, FONT_HERSHEY_COMPLEX, 1, color, 1, LINE_8);
+        }
+    }
+
+};
+
 static void help()
 {
     cout <<
-        "\nThis program is demonstration for ellipse fitting. The program finds\n"
-        "contours and approximate it by ellipses.\n"
-        "Call:\n"
-        "./fitellipse [image_name -- Default ../data/stuff.jpg]\n" << endl;
+    "\nThis program is demonstration for ellipse fitting. The program finds\n"
+    "contours and approximate it by ellipses. Three methods are used to find the \n"
+    "elliptical fits: fitEllipse, fitEllipseAMS and fitEllipseDirect.\n"
+    "Call:\n"
+    "./fitellipse [image_name -- Default ../data/stuff.jpg]\n" << endl;
 }
 
 int sliderPos = 70;
 
 Mat image;
 
+bool fitEllipseQ, fitEllipseAMSQ, fitEllipseDirectQ;
+cv::Scalar fitEllipseColor       = Scalar(255,  0,  0);
+cv::Scalar fitEllipseAMSColor    = Scalar(  0,255,  0);
+cv::Scalar fitEllipseDirectColor = Scalar(  0,  0,255);
+cv::Scalar fitEllipseTrueColor   = Scalar(255,255,255);
+
 void processImage(int, void*);
 
 int main( int argc, char** argv )
 {
-    cv::CommandLineParser parser(argc, argv,
-        "{help h||}{@image|../data/stuff.jpg|}"
-    );
+    fitEllipseQ       = true;
+    fitEllipseAMSQ    = true;
+    fitEllipseDirectQ = true;
+
+    cv::CommandLineParser parser(argc, argv,"{help h||}{@image|../data/ellipses.jpg|}");
     if (parser.has("help"))
     {
         help();
@@ -56,10 +207,11 @@ int main( int argc, char** argv )
     }
 
     imshow("source", image);
-    namedWindow("result", 1);
+    namedWindow("result", CV_WINDOW_NORMAL );
 
     // Create toolbars. HighGUI use.
     createTrackbar( "threshold", "result", &sliderPos, 255, processImage );
+
     processImage(0, 0);
 
     // Wait for a key stroke; the same function arranges events processing
@@ -71,13 +223,35 @@ int main( int argc, char** argv )
 // draw it and approximate it by ellipses.
 void processImage(int /*h*/, void*)
 {
+    RotatedRect box, boxAMS, boxDirect;
     vector<vector<Point> > contours;
     Mat bimage = image >= sliderPos;
 
     findContours(bimage, contours, RETR_LIST, CHAIN_APPROX_NONE);
 
-    Mat cimage = Mat::zeros(bimage.size(), CV_8UC3);
+    canvas paper;
+    paper.init(int(0.8*MIN(bimage.rows, bimage.cols)), int(1.2*MAX(bimage.rows, bimage.cols)));
+    paper.stretch(cv::Point2f(0.0f, 0.0f), cv::Point2f((float)(bimage.cols+2.0), (float)(bimage.rows+2.0)));
+
+    std::vector<std::string> text;
+    std::vector<cv::Scalar> color;
+
+    if (fitEllipseQ) {
+        text.push_back("OpenCV");
+        color.push_back(fitEllipseColor);
+    }
+    if (fitEllipseAMSQ) {
+        text.push_back("AMS");
+        color.push_back(fitEllipseAMSColor);
+    }
+    if (fitEllipseDirectQ) {
+        text.push_back("Direct");
+        color.push_back(fitEllipseDirectColor);
+    }
+    paper.drawLabels(text, color);
 
+    int margin = 2;
+    vector< vector<Point2f> > points;
     for(size_t i = 0; i < contours.size(); i++)
     {
         size_t count = contours[i].size();
@@ -86,19 +260,57 @@ void processImage(int /*h*/, void*)
 
         Mat pointsf;
         Mat(contours[i]).convertTo(pointsf, CV_32F);
-        RotatedRect box = fitEllipse(pointsf);
 
-        if( MAX(box.size.width, box.size.height) > MIN(box.size.width, box.size.height)*30 )
+        vector<Point2f>pts;
+        for (int j = 0; j < pointsf.rows; j++) {
+            Point2f pnt = Point2f(pointsf.at<float>(j,0), pointsf.at<float>(j,1));
+            if ((pnt.x > margin && pnt.y > margin && pnt.x < bimage.cols-margin && pnt.y < bimage.rows-margin)) {
+                if(j%20==0){
+                    pts.push_back(pnt);
+                }
+            }
+        }
+        points.push_back(pts);
+    }
+
+    for(size_t i = 0; i < points.size(); i++)
+    {
+        vector<Point2f> pts = points[i];
+
+        if (pts.size()<=5) {
             continue;
-        drawContours(cimage, contours, (int)i, Scalar::all(255), 1, 8);
+        }
+        if (fitEllipseQ) {
+            box = fitEllipse(pts);
+            if( MAX(box.size.width, box.size.height) > MIN(box.size.width, box.size.height)*30 ||
+               MAX(box.size.width, box.size.height) <= 0 ||
+               MIN(box.size.width, box.size.height) <= 0){continue;};
+        }
+        if (fitEllipseAMSQ) {
+            boxAMS = fitEllipseAMS(pts);
+            if( MAX(boxAMS.size.width, boxAMS.size.height) > MIN(boxAMS.size.width, boxAMS.size.height)*30 ||
+               MAX(box.size.width, box.size.height) <= 0 ||
+               MIN(box.size.width, box.size.height) <= 0){continue;};
+        }
+        if (fitEllipseDirectQ) {
+            boxDirect = fitEllipseDirect(pts);
+            if( MAX(boxDirect.size.width, boxDirect.size.height) > MIN(boxDirect.size.width, boxDirect.size.height)*30 ||
+               MAX(box.size.width, box.size.height) <= 0 ||
+               MIN(box.size.width, box.size.height) <= 0 ){continue;};
+        }
 
-        ellipse(cimage, box, Scalar(0,0,255), 1, LINE_AA);
-        ellipse(cimage, box.center, box.size*0.5f, box.angle, 0, 360, Scalar(0,255,255), 1, LINE_AA);
-        Point2f vtx[4];
-        box.points(vtx);
-        for( int j = 0; j < 4; j++ )
-            line(cimage, vtx[j], vtx[(j+1)%4], Scalar(0,255,0), 1, LINE_AA);
+        if (fitEllipseQ) {
+            paper.drawEllipseWithBox(box, fitEllipseColor, 3);
+        }
+        if (fitEllipseAMSQ) {
+            paper.drawEllipseWithBox(boxAMS, fitEllipseAMSColor, 2);
+        }
+        if (fitEllipseDirectQ) {
+            paper.drawEllipseWithBox(boxDirect, fitEllipseDirectColor, 1);
+        }
+
+        paper.drawPoints(pts, cv::Scalar(255,255,255));
     }
 
-    imshow("result", cimage);
+    imshow("result", paper.img);
 }
diff --git a/samples/cpp/houghcircles.cpp b/samples/cpp/houghcircles.cpp
deleted file mode 100644 (file)
index f749b0e..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-#include "opencv2/imgcodecs.hpp"
-#include "opencv2/highgui.hpp"
-#include "opencv2/imgproc.hpp"
-
-#include <iostream>
-
-using namespace cv;
-using namespace std;
-
-static void help()
-{
-    cout << "\nThis program demonstrates circle finding with the Hough transform.\n"
-            "Usage:\n"
-            "./houghcircles <image_name>, Default is ../data/board.jpg\n" << endl;
-}
-
-int main(int argc, char** argv)
-{
-    cv::CommandLineParser parser(argc, argv,
-        "{help h ||}{@image|../data/board.jpg|}"
-    );
-    if (parser.has("help"))
-    {
-        help();
-        return 0;
-    }
-    //![load]
-    string filename = parser.get<string>("@image");
-    Mat img = imread(filename, IMREAD_COLOR);
-    if(img.empty())
-    {
-        help();
-        cout << "can not open " << filename << endl;
-        return -1;
-    }
-    //![load]
-
-    //![convert_to_gray]
-    Mat gray;
-    cvtColor(img, gray, COLOR_BGR2GRAY);
-    //![convert_to_gray]
-
-    //![reduce_noise]
-    medianBlur(gray, gray, 5);
-    //![reduce_noise]
-
-    //![houghcircles]
-    vector<Vec3f> circles;
-    HoughCircles(gray, circles, HOUGH_GRADIENT, 1,
-                 gray.rows/16, // change this value to detect circles with different distances to each other
-                 100, 30, 1, 30 // change the last two parameters
-                                // (min_radius & max_radius) to detect larger circles
-                 );
-    //![houghcircles]
-
-    //![draw]
-    for( size_t i = 0; i < circles.size(); i++ )
-    {
-        Vec3i c = circles[i];
-        circle( img, Point(c[0], c[1]), c[2], Scalar(0,0,255), 3, LINE_AA);
-        circle( img, Point(c[0], c[1]), 2, Scalar(0,255,0), 3, LINE_AA);
-    }
-    //![draw]
-
-    //![display]
-    imshow("detected circles", img);
-    waitKey();
-    //![display]
-
-    return 0;
-}
diff --git a/samples/cpp/houghlines.cpp b/samples/cpp/houghlines.cpp
deleted file mode 100644 (file)
index 94eec86..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-#include "opencv2/imgcodecs.hpp"
-#include "opencv2/highgui.hpp"
-#include "opencv2/imgproc.hpp"
-
-#include <iostream>
-
-using namespace cv;
-using namespace std;
-
-static void help()
-{
-    cout << "\nThis program demonstrates line finding with the Hough transform.\n"
-            "Usage:\n"
-            "./houghlines <image_name>, Default is ../data/pic1.png\n" << endl;
-}
-
-int main(int argc, char** argv)
-{
-    cv::CommandLineParser parser(argc, argv,
-        "{help h||}{@image|../data/pic1.png|}"
-    );
-    if (parser.has("help"))
-    {
-        help();
-        return 0;
-    }
-    string filename = parser.get<string>("@image");
-    if (filename.empty())
-    {
-        help();
-        cout << "no image_name provided" << endl;
-        return -1;
-    }
-    Mat src = imread(filename, 0);
-    if(src.empty())
-    {
-        help();
-        cout << "can not open " << filename << endl;
-        return -1;
-    }
-
-    Mat dst, cdst;
-    Canny(src, dst, 50, 200, 3);
-    cvtColor(dst, cdst, COLOR_GRAY2BGR);
-
-#if 0
-    vector<Vec2f> lines;
-    HoughLines(dst, lines, 1, CV_PI/180, 100, 0, 0 );
-
-    for( size_t i = 0; i < lines.size(); i++ )
-    {
-        float rho = lines[i][0], theta = lines[i][1];
-        Point pt1, pt2;
-        double a = cos(theta), b = sin(theta);
-        double x0 = a*rho, y0 = b*rho;
-        pt1.x = cvRound(x0 + 1000*(-b));
-        pt1.y = cvRound(y0 + 1000*(a));
-        pt2.x = cvRound(x0 - 1000*(-b));
-        pt2.y = cvRound(y0 - 1000*(a));
-        line( cdst, pt1, pt2, Scalar(0,0,255), 3, CV_AA);
-    }
-#else
-    vector<Vec4i> lines;
-    HoughLinesP(dst, lines, 1, CV_PI/180, 50, 50, 10 );
-    for( size_t i = 0; i < lines.size(); i++ )
-    {
-        Vec4i l = lines[i];
-        line( cdst, Point(l[0], l[1]), Point(l[2], l[3]), Scalar(0,0,255), 3, LINE_AA);
-    }
-#endif
-    imshow("source", src);
-    imshow("detected lines", cdst);
-
-    waitKey();
-
-    return 0;
-}
@@ -23,15 +23,22 @@ int main(){
     Mat output_image;
     morphologyEx(input_image, output_image, MORPH_HITMISS, kernel);
 
-    const int rate = 10;
+    const int rate = 50;
     kernel = (kernel + 1) * 127;
     kernel.convertTo(kernel, CV_8U);
+
     resize(kernel, kernel, Size(), rate, rate, INTER_NEAREST);
     imshow("kernel", kernel);
+    moveWindow("kernel", 0, 0);
+
     resize(input_image, input_image, Size(), rate, rate, INTER_NEAREST);
     imshow("Original", input_image);
+    moveWindow("Original", 0, 200);
+
     resize(output_image, output_image, Size(), rate, rate, INTER_NEAREST);
     imshow("Hit or Miss", output_image);
+    moveWindow("Hit or Miss", 500, 200);
+
     waitKey(0);
     return 0;
 }
diff --git a/samples/cpp/tutorial_code/ImgProc/Pyramids.cpp b/samples/cpp/tutorial_code/ImgProc/Pyramids.cpp
deleted file mode 100644 (file)
index 62c2fe5..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * @file Pyramids.cpp
- * @brief Sample code of image pyramids (pyrDown and pyrUp)
- * @author OpenCV team
- */
-
-#include "opencv2/imgproc.hpp"
-#include "opencv2/imgcodecs.hpp"
-#include "opencv2/highgui.hpp"
-
-using namespace cv;
-
-/// Global variables
-Mat src, dst, tmp;
-
-const char* window_name = "Pyramids Demo";
-
-
-/**
- * @function main
- */
-int main( void )
-{
-  /// General instructions
-  printf( "\n Zoom In-Out demo  \n " );
-  printf( "------------------ \n" );
-  printf( " * [u] -> Zoom in  \n" );
-  printf( " * [d] -> Zoom out \n" );
-  printf( " * [ESC] -> Close program \n \n" );
-
-  //![load]
-  src = imread( "../data/chicky_512.png" ); // Loads the test image
-  if( src.empty() )
-    { printf(" No data! -- Exiting the program \n");
-      return -1; }
-  //![load]
-
-  tmp = src;
-  dst = tmp;
-
-  //![create_window]
-  imshow( window_name, dst );
-  //![create_window]
-
-  //![infinite_loop]
-  for(;;)
-  {
-    char c = (char)waitKey(0);
-
-    if( c == 27 )
-      { break; }
-    //![pyrup]
-    if( c == 'u' )
-      { pyrUp( tmp, dst, Size( tmp.cols*2, tmp.rows*2 ) );
-        printf( "** Zoom In: Image x 2 \n" );
-      }
-    //![pyrup]
-    //![pyrdown]
-    else if( c == 'd' )
-      { pyrDown( tmp, dst, Size( tmp.cols/2, tmp.rows/2 ) );
-        printf( "** Zoom Out: Image / 2 \n" );
-      }
-    //![pyrdown]
-    imshow( window_name, dst );
-
-    //![update_tmp]
-    tmp = dst;
-    //![update_tmp]
-   }
-   //![infinite_loop]
-
-   return 0;
-}
diff --git a/samples/cpp/tutorial_code/ImgProc/Pyramids/Pyramids.cpp b/samples/cpp/tutorial_code/ImgProc/Pyramids/Pyramids.cpp
new file mode 100644 (file)
index 0000000..1cc7bf2
--- /dev/null
@@ -0,0 +1,69 @@
+/**
+ * @file Pyramids.cpp
+ * @brief Sample code of image pyramids (pyrDown and pyrUp)
+ * @author OpenCV team
+ */
+
+#include "iostream"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/highgui.hpp"
+
+using namespace std;
+using namespace cv;
+
+const char* window_name = "Pyramids Demo";
+
+/**
+ * @function main
+ */
+int main( int argc, char** argv )
+{
+    /// General instructions
+    cout << "\n Zoom In-Out demo \n "
+            "------------------  \n"
+            " * [i] -> Zoom in   \n"
+            " * [o] -> Zoom out  \n"
+            " * [ESC] -> Close program \n" << endl;
+
+    //![load]
+    const char* filename = argc >=2 ? argv[1] : "../data/chicky_512.png";
+
+    // Loads an image
+    Mat src = imread( filename );
+
+    // Check if image is loaded fine
+    if(src.empty()){
+        printf(" Error opening image\n");
+        printf(" Program Arguments: [image_name -- default ../data/chicky_512.png] \n");
+        return -1;
+    }
+    //![load]
+
+    //![loop]
+    for(;;)
+    {
+        //![show_image]
+        imshow( window_name, src );
+        //![show_image]
+        char c = (char)waitKey(0);
+
+        if( c == 27 )
+        { break; }
+        //![pyrup]
+        else if( c == 'i' )
+        { pyrUp( src, src, Size( src.cols*2, src.rows*2 ) );
+            printf( "** Zoom In: Image x 2 \n" );
+        }
+        //![pyrup]
+        //![pyrdown]
+        else if( c == 'o' )
+        { pyrDown( src, src, Size( src.cols/2, src.rows/2 ) );
+            printf( "** Zoom Out: Image / 2 \n" );
+        }
+        //![pyrdown]
+    }
+    //![loop]
+
+    return 0;
+}
diff --git a/samples/cpp/tutorial_code/ImgProc/Smoothing.cpp b/samples/cpp/tutorial_code/ImgProc/Smoothing.cpp
deleted file mode 100644 (file)
index 58aa474..0000000
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
- * file Smoothing.cpp
- * brief Sample code for simple filters
- * author OpenCV team
- */
-
-#include "opencv2/imgproc.hpp"
-#include "opencv2/imgcodecs.hpp"
-#include "opencv2/highgui.hpp"
-
-using namespace std;
-using namespace cv;
-
-/// Global Variables
-int DELAY_CAPTION = 1500;
-int DELAY_BLUR = 100;
-int MAX_KERNEL_LENGTH = 31;
-
-Mat src; Mat dst;
-char window_name[] = "Smoothing Demo";
-
-/// Function headers
-int display_caption( const char* caption );
-int display_dst( int delay );
-
-
-/**
- * function main
- */
-int main( void )
-{
-  namedWindow( window_name, WINDOW_AUTOSIZE );
-
-  /// Load the source image
-  src = imread( "../data/lena.jpg", IMREAD_COLOR );
-
-  if( display_caption( "Original Image" ) != 0 ) { return 0; }
-
-  dst = src.clone();
-  if( display_dst( DELAY_CAPTION ) != 0 ) { return 0; }
-
-
-  /// Applying Homogeneous blur
-  if( display_caption( "Homogeneous Blur" ) != 0 ) { return 0; }
-
-  //![blur]
-  for ( int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2 )
-      { blur( src, dst, Size( i, i ), Point(-1,-1) );
-        if( display_dst( DELAY_BLUR ) != 0 ) { return 0; } }
-  //![blur]
-
-  /// Applying Gaussian blur
-  if( display_caption( "Gaussian Blur" ) != 0 ) { return 0; }
-
-  //![gaussianblur]
-  for ( int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2 )
-      { GaussianBlur( src, dst, Size( i, i ), 0, 0 );
-        if( display_dst( DELAY_BLUR ) != 0 ) { return 0; } }
-  //![gaussianblur]
-
-  /// Applying Median blur
-  if( display_caption( "Median Blur" ) != 0 ) { return 0; }
-
-  //![medianblur]
-  for ( int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2 )
-      { medianBlur ( src, dst, i );
-        if( display_dst( DELAY_BLUR ) != 0 ) { return 0; } }
-  //![medianblur]
-
-  /// Applying Bilateral Filter
-  if( display_caption( "Bilateral Blur" ) != 0 ) { return 0; }
-
-  //![bilateralfilter]
-  for ( int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2 )
-      { bilateralFilter ( src, dst, i, i*2, i/2 );
-        if( display_dst( DELAY_BLUR ) != 0 ) { return 0; } }
-  //![bilateralfilter]
-
-  /// Wait until user press a key
-  display_caption( "End: Press a key!" );
-
-  waitKey(0);
-
-  return 0;
-}
-
-/**
- * @function display_caption
- */
-int display_caption( const char* caption )
-{
-  dst = Mat::zeros( src.size(), src.type() );
-  putText( dst, caption,
-           Point( src.cols/4, src.rows/2),
-           FONT_HERSHEY_COMPLEX, 1, Scalar(255, 255, 255) );
-
-  imshow( window_name, dst );
-  int c = waitKey( DELAY_CAPTION );
-  if( c >= 0 ) { return -1; }
-  return 0;
-}
-
-/**
- * @function display_dst
- */
-int display_dst( int delay )
-{
-  imshow( window_name, dst );
-  int c = waitKey ( delay );
-  if( c >= 0 ) { return -1; }
-  return 0;
-}
diff --git a/samples/cpp/tutorial_code/ImgProc/Smoothing/Smoothing.cpp b/samples/cpp/tutorial_code/ImgProc/Smoothing/Smoothing.cpp
new file mode 100644 (file)
index 0000000..d96b52a
--- /dev/null
@@ -0,0 +1,115 @@
+/**
+ * file Smoothing.cpp
+ * brief Sample code for simple filters
+ * author OpenCV team
+ */
+
+#include <iostream>
+#include "opencv2/imgproc.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/highgui.hpp"
+
+using namespace std;
+using namespace cv;
+
+/// Global Variables
+int DELAY_CAPTION = 1500;
+int DELAY_BLUR = 100;
+int MAX_KERNEL_LENGTH = 31;
+
+Mat src; Mat dst;
+char window_name[] = "Smoothing Demo";
+
+/// Function headers
+int display_caption( const char* caption );
+int display_dst( int delay );
+
+
+/**
+ * function main
+ */
+int main( int argc, char ** argv )
+{
+    namedWindow( window_name, WINDOW_AUTOSIZE );
+
+    /// Load the source image
+    const char* filename = argc >=2 ? argv[1] : "../data/lena.jpg";
+
+    src = imread( filename, IMREAD_COLOR );
+    if(src.empty()){
+        printf(" Error opening image\n");
+        printf(" Usage: ./Smoothing [image_name -- default ../data/lena.jpg] \n");
+        return -1;
+    }
+
+    if( display_caption( "Original Image" ) != 0 ) { return 0; }
+
+    dst = src.clone();
+    if( display_dst( DELAY_CAPTION ) != 0 ) { return 0; }
+
+
+    /// Applying Homogeneous blur
+    if( display_caption( "Homogeneous Blur" ) != 0 ) { return 0; }
+
+    //![blur]
+    for ( int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2 )
+    { blur( src, dst, Size( i, i ), Point(-1,-1) );
+        if( display_dst( DELAY_BLUR ) != 0 ) { return 0; } }
+    //![blur]
+
+    /// Applying Gaussian blur
+    if( display_caption( "Gaussian Blur" ) != 0 ) { return 0; }
+
+    //![gaussianblur]
+    for ( int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2 )
+    { GaussianBlur( src, dst, Size( i, i ), 0, 0 );
+        if( display_dst( DELAY_BLUR ) != 0 ) { return 0; } }
+    //![gaussianblur]
+
+    /// Applying Median blur
+    if( display_caption( "Median Blur" ) != 0 ) { return 0; }
+
+    //![medianblur]
+    for ( int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2 )
+    { medianBlur ( src, dst, i );
+        if( display_dst( DELAY_BLUR ) != 0 ) { return 0; } }
+    //![medianblur]
+
+    /// Applying Bilateral Filter
+    if( display_caption( "Bilateral Blur" ) != 0 ) { return 0; }
+
+    //![bilateralfilter]
+    for ( int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2 )
+    { bilateralFilter ( src, dst, i, i*2, i/2 );
+        if( display_dst( DELAY_BLUR ) != 0 ) { return 0; } }
+    //![bilateralfilter]
+
+    /// Done
+    display_caption( "Done!" );
+
+    return 0;
+}
+
+/**
+ * @function display_caption
+ */
+int display_caption( const char* caption )
+{
+    dst = Mat::zeros( src.size(), src.type() );
+    putText( dst, caption,
+             Point( src.cols/4, src.rows/2),
+             FONT_HERSHEY_COMPLEX, 1, Scalar(255, 255, 255) );
+
+    return display_dst(DELAY_CAPTION);
+}
+
+/**
+ * @function display_dst
+ */
+int display_dst( int delay )
+{
+    imshow( window_name, dst );
+    int c = waitKey ( delay );
+    if( c >= 0 ) { return -1; }
+    return 0;
+}
@@ -4,28 +4,32 @@
  * @author OpenCV team
  */
 
-#include <iostream>
 #include <opencv2/opencv.hpp>
 
+void show_wait_destroy(const char* winname, cv::Mat img);
+
 using namespace std;
 using namespace cv;
 
 int main(int, char** argv)
 {
-//! [load_image]
+    //! [load_image]
     // Load the image
     Mat src = imread(argv[1]);
 
     // Check if image is loaded fine
-    if(!src.data)
-        cerr << "Problem loading image!!!" << endl;
+    if(src.empty()){
+        printf(" Error opening image\n");
+        printf(" Program Arguments: [image_path]\n");
+        return -1;
+    }
 
     // Show source image
     imshow("src", src);
-//! [load_image]
+    //! [load_image]
 
-//! [gray]
-    // Transform source image to gray if it is not
+    //! [gray]
+    // Transform source image to gray if it is not already
     Mat gray;
 
     if (src.channels() == 3)
@@ -38,58 +42,58 @@ int main(int, char** argv)
     }
 
     // Show gray image
-    imshow("gray", gray);
-//! [gray]
+    show_wait_destroy("gray", gray);
+    //! [gray]
 
-//! [bin]
+    //! [bin]
     // Apply adaptiveThreshold at the bitwise_not of gray, notice the ~ symbol
     Mat bw;
     adaptiveThreshold(~gray, bw, 255, CV_ADAPTIVE_THRESH_MEAN_C, THRESH_BINARY, 15, -2);
 
     // Show binary image
-    imshow("binary", bw);
-//! [bin]
+    show_wait_destroy("binary", bw);
+    //! [bin]
 
-//! [init]
+    //! [init]
     // Create the images that will use to extract the horizontal and vertical lines
     Mat horizontal = bw.clone();
     Mat vertical = bw.clone();
-//! [init]
+    //! [init]
 
-//! [horiz]
+    //! [horiz]
     // Specify size on horizontal axis
-    int horizontalsize = horizontal.cols / 30;
+    int horizontal_size = horizontal.cols / 30;
 
     // Create structure element for extracting horizontal lines through morphology operations
-    Mat horizontalStructure = getStructuringElement(MORPH_RECT, Size(horizontalsize,1));
+    Mat horizontalStructure = getStructuringElement(MORPH_RECT, Size(horizontal_size, 1));
 
     // Apply morphology operations
     erode(horizontal, horizontal, horizontalStructure, Point(-1, -1));
     dilate(horizontal, horizontal, horizontalStructure, Point(-1, -1));
 
     // Show extracted horizontal lines
-    imshow("horizontal", horizontal);
-//! [horiz]
+    show_wait_destroy("horizontal", horizontal);
+    //! [horiz]
 
-//! [vert]
+    //! [vert]
     // Specify size on vertical axis
-    int verticalsize = vertical.rows / 30;
+    int vertical_size = vertical.rows / 30;
 
     // Create structure element for extracting vertical lines through morphology operations
-    Mat verticalStructure = getStructuringElement(MORPH_RECT, Size( 1,verticalsize));
+    Mat verticalStructure = getStructuringElement(MORPH_RECT, Size(1, vertical_size));
 
     // Apply morphology operations
     erode(vertical, vertical, verticalStructure, Point(-1, -1));
     dilate(vertical, vertical, verticalStructure, Point(-1, -1));
 
     // Show extracted vertical lines
-    imshow("vertical", vertical);
-//! [vert]
+    show_wait_destroy("vertical", vertical);
+    //! [vert]
 
-//! [smooth]
+    //! [smooth]
     // Inverse vertical image
     bitwise_not(vertical, vertical);
-    imshow("vertical_bit", vertical);
+    show_wait_destroy("vertical_bit", vertical);
 
     // Extract edges and smooth image according to the logic
     // 1. extract edges
@@ -101,12 +105,12 @@ int main(int, char** argv)
     // Step 1
     Mat edges;
     adaptiveThreshold(vertical, edges, 255, CV_ADAPTIVE_THRESH_MEAN_C, THRESH_BINARY, 3, -2);
-    imshow("edges", edges);
+    show_wait_destroy("edges", edges);
 
     // Step 2
     Mat kernel = Mat::ones(2, 2, CV_8UC1);
     dilate(edges, edges, kernel);
-    imshow("dilate", edges);
+    show_wait_destroy("dilate", edges);
 
     // Step 3
     Mat smooth;
@@ -119,9 +123,15 @@ int main(int, char** argv)
     smooth.copyTo(vertical, edges);
 
     // Show final result
-    imshow("smooth", vertical);
-//! [smooth]
+    show_wait_destroy("smooth - final", vertical);
+    //! [smooth]
 
-    waitKey(0);
     return 0;
-}
\ No newline at end of file
+}
+
+void show_wait_destroy(const char* winname, cv::Mat img) {
+    imshow(winname, img);
+    moveWindow(winname, 500, 0);
+    waitKey(0);
+    destroyWindow(winname);
+}
index 0f56c49..80b6e84 100644 (file)
@@ -15,50 +15,53 @@ using namespace cv;
  */
 int main( int argc, char** argv )
 {
-  //![variables]
-  Mat src, src_gray, dst;
-  int kernel_size = 3;
-  int scale = 1;
-  int delta = 0;
-  int ddepth = CV_16S;
-  const char* window_name = "Laplace Demo";
-  //![variables]
+    //![variables]
+    // Declare the variables we are going to use
+    Mat src, src_gray, dst;
+    int kernel_size = 3;
+    int scale = 1;
+    int delta = 0;
+    int ddepth = CV_16S;
+    const char* window_name = "Laplace Demo";
+    //![variables]
 
-  //![load]
-  String imageName("../data/lena.jpg"); // by default
-  if (argc > 1)
-  {
-    imageName = argv[1];
-  }
-  src = imread( imageName, IMREAD_COLOR ); // Load an image
+    //![load]
+    const char* imageName = argc >=2 ? argv[1] : "../data/lena.jpg";
 
-  if( src.empty() )
-    { return -1; }
-  //![load]
+    src = imread( imageName, IMREAD_COLOR ); // Load an image
 
-  //![reduce_noise]
-  /// Reduce noise by blurring with a Gaussian filter
-  GaussianBlur( src, src, Size(3,3), 0, 0, BORDER_DEFAULT );
-  //![reduce_noise]
+    // Check if image is loaded fine
+    if(src.empty()){
+        printf(" Error opening image\n");
+        printf(" Program Arguments: [image_name -- default ../data/lena.jpg] \n");
+        return -1;
+    }
+    //![load]
 
-  //![convert_to_gray]
-  cvtColor( src, src_gray, COLOR_BGR2GRAY ); // Convert the image to grayscale
-  //![convert_to_gray]
+    //![reduce_noise]
+    // Reduce noise by blurring with a Gaussian filter ( kernel size = 3 )
+    GaussianBlur( src, src, Size(3, 3), 0, 0, BORDER_DEFAULT );
+    //![reduce_noise]
 
-  /// Apply Laplace function
-  Mat abs_dst;
-  //![laplacian]
-  Laplacian( src_gray, dst, ddepth, kernel_size, scale, delta, BORDER_DEFAULT );
-  //![laplacian]
+    //![convert_to_gray]
+    cvtColor( src, src_gray, COLOR_BGR2GRAY ); // Convert the image to grayscale
+    //![convert_to_gray]
 
-  //![convert]
-  convertScaleAbs( dst, abs_dst );
-  //![convert]
+    /// Apply Laplace function
+    Mat abs_dst;
+    //![laplacian]
+    Laplacian( src_gray, dst, ddepth, kernel_size, scale, delta, BORDER_DEFAULT );
+    //![laplacian]
 
-  //![display]
-  imshow( window_name, abs_dst );
-  waitKey(0);
-  //![display]
+    //![convert]
+    // converting back to CV_8U
+    convertScaleAbs( dst, abs_dst );
+    //![convert]
 
-  return 0;
+    //![display]
+    imshow( window_name, abs_dst );
+    waitKey(0);
+    //![display]
+
+    return 0;
 }
index 5209380..16661d8 100644 (file)
@@ -30,6 +30,7 @@ int main( int argc, char** argv )
   cout << "\nPress 'ESC' to exit program.\nPress 'R' to reset values ( ksize will be -1 equal to Scharr function )";
 
   //![variables]
+  // First we declare the variables we are going to use
   Mat image,src, src_gray;
   Mat grad;
   const String window_name = "Sobel Demo - Simple Edge Detector";
@@ -40,11 +41,14 @@ int main( int argc, char** argv )
   //![variables]
 
   //![load]
-  String imageName = parser.get<String>("@input"); // by default
+  String imageName = parser.get<String>("@input");
+  // As usual we load our source image (src)
   image = imread( imageName, IMREAD_COLOR ); // Load an image
 
+  // Check if image is loaded fine
   if( image.empty() )
   {
+    printf("Error opening image: %s\n", imageName.c_str());
     return 1;
   }
   //![load]
@@ -52,10 +56,12 @@ int main( int argc, char** argv )
   for (;;)
   {
     //![reduce_noise]
+    // Remove noise by blurring with a Gaussian filter ( kernel size = 3 )
     GaussianBlur(image, src, Size(3, 3), 0, 0, BORDER_DEFAULT);
     //![reduce_noise]
 
     //![convert_to_gray]
+    // Convert the image to grayscale
     cvtColor(src, src_gray, COLOR_BGR2GRAY);
     //![convert_to_gray]
 
@@ -72,6 +78,7 @@ int main( int argc, char** argv )
     //![sobel]
 
     //![convert]
+    // converting back to CV_8U
     convertScaleAbs(grad_x, abs_grad_x);
     convertScaleAbs(grad_y, abs_grad_y);
     //![convert]
index ca78078..52c7610 100644 (file)
 using namespace cv;
 
 //![variables]
+// Declare the variables
 Mat src, dst;
 int top, bottom, left, right;
-int borderType;
+int borderType = BORDER_CONSTANT;
 const char* window_name = "copyMakeBorder Demo";
 RNG rng(12345);
 //![variables]
@@ -23,65 +24,61 @@ RNG rng(12345);
  */
 int main( int argc, char** argv )
 {
-  //![load]
-  String imageName("../data/lena.jpg"); // by default
-  if (argc > 1)
-  {
-      imageName = argv[1];
-  }
-  src = imread( imageName, IMREAD_COLOR ); // Load an image
+    //![load]
+    const char* imageName = argc >=2 ? argv[1] : "../data/lena.jpg";
 
-  if( src.empty() )
-    {
-      printf(" No data entered, please enter the path to an image file \n");
-      return -1;
-    }
-  //![load]
+    // Loads an image
+    src = imread( imageName, IMREAD_COLOR ); // Load an image
 
-  /// Brief how-to for this program
-  printf( "\n \t copyMakeBorder Demo: \n" );
-  printf( "\t -------------------- \n" );
-  printf( " ** Press 'c' to set the border to a random constant value \n");
-  printf( " ** Press 'r' to set the border to be replicated \n");
-  printf( " ** Press 'ESC' to exit the program \n");
+    // Check if image is loaded fine
+    if( src.empty()) {
+        printf(" Error opening image\n");
+        printf(" Program Arguments: [image_name -- default ../data/lena.jpg] \n");
+        return -1;
+    }
+    //![load]
 
-  //![create_window]
-  namedWindow( window_name, WINDOW_AUTOSIZE );
-  //![create_window]
+    // Brief how-to for this program
+    printf( "\n \t copyMakeBorder Demo: \n" );
+    printf( "\t -------------------- \n" );
+    printf( " ** Press 'c' to set the border to a random constant value \n");
+    printf( " ** Press 'r' to set the border to be replicated \n");
+    printf( " ** Press 'ESC' to exit the program \n");
 
-  //![init_arguments]
-  /// Initialize arguments for the filter
-  top = (int) (0.05*src.rows); bottom = (int) (0.05*src.rows);
-  left = (int) (0.05*src.cols); right = (int) (0.05*src.cols);
-  //![init_arguments]
+    //![create_window]
+    namedWindow( window_name, WINDOW_AUTOSIZE );
+    //![create_window]
 
-  dst = src;
-  imshow( window_name, dst );
+    //![init_arguments]
+    // Initialize arguments for the filter
+    top = (int) (0.05*src.rows); bottom = top;
+    left = (int) (0.05*src.cols); right = left;
+    //![init_arguments]
 
-  for(;;)
-       {
-         //![check_keypress]
-         char c = (char)waitKey(500);
-         if( c == 27 )
-           { break; }
-         else if( c == 'c' )
-           { borderType = BORDER_CONSTANT; }
-         else if( c == 'r' )
-           { borderType = BORDER_REPLICATE; }
-         //![check_keypress]
+    for(;;)
+    {
+        //![update_value]
+        Scalar value( rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255) );
+        //![update_value]
 
-         //![update_value]
-         Scalar value( rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255) );
-         //![update_value]
+        //![copymakeborder]
+        copyMakeBorder( src, dst, top, bottom, left, right, borderType, value );
+        //![copymakeborder]
 
-         //![copymakeborder]
-         copyMakeBorder( src, dst, top, bottom, left, right, borderType, value );
-         //![copymakeborder]
+        //![display]
+        imshow( window_name, dst );
+        //![display]
 
-         //![display]
-         imshow( window_name, dst );
-         //![display]
-       }
+        //![check_keypress]
+        char c = (char)waitKey(500);
+        if( c == 27 )
+        { break; }
+        else if( c == 'c' )
+        { borderType = BORDER_CONSTANT; }
+        else if( c == 'r' )
+        { borderType = BORDER_REPLICATE; }
+        //![check_keypress]
+    }
 
-  return 0;
+    return 0;
 }
index 24aef46..3b7f3a9 100644 (file)
@@ -15,56 +15,60 @@ using namespace cv;
  */
 int main ( int argc, char** argv )
 {
-  /// Declare variables
-  Mat src, dst;
+    // Declare variables
+    Mat src, dst;
 
-  Mat kernel;
-  Point anchor;
-  double delta;
-  int ddepth;
-  int kernel_size;
-  const char* window_name = "filter2D Demo";
+    Mat kernel;
+    Point anchor;
+    double delta;
+    int ddepth;
+    int kernel_size;
+    const char* window_name = "filter2D Demo";
 
-  //![load]
-  String imageName("../data/lena.jpg"); // by default
-  if (argc > 1)
-  {
-    imageName = argv[1];
-  }
-  src = imread( imageName, IMREAD_COLOR ); // Load an image
+    //![load]
+    const char* imageName = argc >=2 ? argv[1] : "../data/lena.jpg";
 
-  if( src.empty() )
-    { return -1; }
-  //![load]
+    // Loads an image
+    src = imread( imageName, IMREAD_COLOR ); // Load an image
 
-  //![init_arguments]
-  /// Initialize arguments for the filter
-  anchor = Point( -1, -1 );
-  delta = 0;
-  ddepth = -1;
-  //![init_arguments]
+    if( src.empty() )
+    {
+        printf(" Error opening image\n");
+        printf(" Program Arguments: [image_name -- default ../data/lena.jpg] \n");
+        return -1;
+    }
+    //![load]
 
-  /// Loop - Will filter the image with different kernel sizes each 0.5 seconds
-  int ind = 0;
-  for(;;)
-       {
-         char c = (char)waitKey(500);
-         /// Press 'ESC' to exit the program
-         if( c == 27 )
-           { break; }
+    //![init_arguments]
+    // Initialize arguments for the filter
+    anchor = Point( -1, -1 );
+    delta = 0;
+    ddepth = -1;
+    //![init_arguments]
 
-         //![update_kernel]
-         /// Update kernel size for a normalized box filter
-         kernel_size = 3 + 2*( ind%5 );
-         kernel = Mat::ones( kernel_size, kernel_size, CV_32F )/ (float)(kernel_size*kernel_size);
-         //![update_kernel]
+    // Loop - Will filter the image with different kernel sizes each 0.5 seconds
+    int ind = 0;
+    for(;;)
+    {
+        //![update_kernel]
+        // Update kernel size for a normalized box filter
+        kernel_size = 3 + 2*( ind%5 );
+        kernel = Mat::ones( kernel_size, kernel_size, CV_32F )/ (float)(kernel_size*kernel_size);
+        //![update_kernel]
 
-         //![apply_filter]
-         filter2D(src, dst, ddepth , kernel, anchor, delta, BORDER_DEFAULT );
-         //![apply_filter]
-         imshow( window_name, dst );
-         ind++;
-       }
+        //![apply_filter]
+        // Apply filter
+        filter2D(src, dst, ddepth , kernel, anchor, delta, BORDER_DEFAULT );
+        //![apply_filter]
+        imshow( window_name, dst );
 
-  return 0;
+        char c = (char)waitKey(500);
+        // Press 'ESC' to exit the program
+        if( c == 27 )
+        { break; }
+
+        ind++;
+    }
+
+    return 0;
 }
diff --git a/samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp b/samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp
new file mode 100644 (file)
index 0000000..bd07220
--- /dev/null
@@ -0,0 +1,65 @@
+/**
+ * @file houghcircles.cpp
+ * @brief This program demonstrates circle finding with the Hough transform
+ */
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
+
+using namespace cv;
+using namespace std;
+
+int main(int argc, char** argv)
+{
+    //![load]
+    const char* filename = argc >=2 ? argv[1] : "../../../data/smarties.png";
+
+    // Loads an image
+    Mat src = imread( filename, IMREAD_COLOR );
+
+    // Check if image is loaded fine
+    if(src.empty()){
+        printf(" Error opening image\n");
+        printf(" Program Arguments: [image_name -- default %s] \n", filename);
+        return -1;
+    }
+    //![load]
+
+    //![convert_to_gray]
+    Mat gray;
+    cvtColor(src, gray, COLOR_BGR2GRAY);
+    //![convert_to_gray]
+
+    //![reduce_noise]
+    medianBlur(gray, gray, 5);
+    //![reduce_noise]
+
+    //![houghcircles]
+    vector<Vec3f> circles;
+    HoughCircles(gray, circles, HOUGH_GRADIENT, 1,
+                 gray.rows/16,  // change this value to detect circles with different distances to each other
+                 100, 30, 1, 30 // change the last two parameters
+            // (min_radius & max_radius) to detect larger circles
+    );
+    //![houghcircles]
+
+    //![draw]
+    for( size_t i = 0; i < circles.size(); i++ )
+    {
+        Vec3i c = circles[i];
+        Point center = Point(c[0], c[1]);
+        // circle center
+        circle( src, center, 1, Scalar(0,100,100), 3, LINE_AA);
+        // circle outline
+        int radius = c[2];
+        circle( src, center, radius, Scalar(255,0,255), 3, LINE_AA);
+    }
+    //![draw]
+
+    //![display]
+    imshow("detected circles", src);
+    waitKey();
+    //![display]
+
+    return 0;
+}
diff --git a/samples/cpp/tutorial_code/ImgTrans/houghlines.cpp b/samples/cpp/tutorial_code/ImgTrans/houghlines.cpp
new file mode 100644 (file)
index 0000000..57f64c6
--- /dev/null
@@ -0,0 +1,89 @@
+/**
+ * @file houghclines.cpp
+ * @brief This program demonstrates line finding with the Hough transform
+ */
+
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
+
+using namespace cv;
+using namespace std;
+
+int main(int argc, char** argv)
+{
+    // Declare the output variables
+    Mat dst, cdst, cdstP;
+
+    //![load]
+    const char* default_file = "../../../data/sudoku.png";
+    const char* filename = argc >=2 ? argv[1] : default_file;
+
+    // Loads an image
+    Mat src = imread( filename, IMREAD_GRAYSCALE );
+
+    // Check if image is loaded fine
+    if(src.empty()){
+        printf(" Error opening image\n");
+        printf(" Program Arguments: [image_name -- default %s] \n", default_file);
+        return -1;
+    }
+    //![load]
+
+    //![edge_detection]
+    // Edge detection
+    Canny(src, dst, 50, 200, 3);
+    //![edge_detection]
+
+    // Copy edges to the images that will display the results in BGR
+    cvtColor(dst, cdst, COLOR_GRAY2BGR);
+    cdstP = cdst.clone();
+
+    //![hough_lines]
+    // Standard Hough Line Transform
+    vector<Vec2f> lines; // will hold the results of the detection
+    HoughLines(dst, lines, 1, CV_PI/180, 150, 0, 0 ); // runs the actual detection
+    //![hough_lines]
+    //![draw_lines]
+    // Draw the lines
+    for( size_t i = 0; i < lines.size(); i++ )
+    {
+        float rho = lines[i][0], theta = lines[i][1];
+        Point pt1, pt2;
+        double a = cos(theta), b = sin(theta);
+        double x0 = a*rho, y0 = b*rho;
+        pt1.x = cvRound(x0 + 1000*(-b));
+        pt1.y = cvRound(y0 + 1000*(a));
+        pt2.x = cvRound(x0 - 1000*(-b));
+        pt2.y = cvRound(y0 - 1000*(a));
+        line( cdst, pt1, pt2, Scalar(0,0,255), 3, CV_AA);
+    }
+    //![draw_lines]
+
+    //![hough_lines_p]
+    // Probabilistic Line Transform
+    vector<Vec4i> linesP; // will hold the results of the detection
+    HoughLinesP(dst, linesP, 1, CV_PI/180, 50, 50, 10 ); // runs the actual detection
+    //![hough_lines_p]
+    //![draw_lines_p]
+    // Draw the lines
+    for( size_t i = 0; i < linesP.size(); i++ )
+    {
+        Vec4i l = linesP[i];
+        line( cdstP, Point(l[0], l[1]), Point(l[2], l[3]), Scalar(0,0,255), 3, LINE_AA);
+    }
+    //![draw_lines_p]
+
+    //![imshow]
+    // Show results
+    imshow("Source", src);
+    imshow("Detected Lines (in red) - Standard Hough Line Transform", cdst);
+    imshow("Detected Lines (in red) - Probabilistic Line Transform", cdstP);
+    //![imshow]
+
+    //![exit]
+    // Wait and Exit
+    waitKey();
+    return 0;
+    //![exit]
+}
diff --git a/samples/cpp/tutorial_code/viz/histo3D.cpp b/samples/cpp/tutorial_code/viz/histo3D.cpp
new file mode 100644 (file)
index 0000000..1b15737
--- /dev/null
@@ -0,0 +1,184 @@
+#include <opencv2/opencv.hpp>
+#include <iostream>
+
+using namespace std;
+using namespace cv;
+
+#ifdef HAVE_OPENCV_VIZ
+
+const String keys =
+"{Aide h usage ? help  |     | print this message   }"
+"{@arg1                |     | Full path to color imag (3 channels)}"
+;
+
+
+struct Histo3DData {
+    Mat histogram;
+    int seuil;
+    double threshold;
+    Ptr<viz::Viz3d> fen3D;
+    int nbWidget;
+    bool status;
+    double maxH;
+    int code;
+};
+
+void DrawHistogram3D(Histo3DData &);
+void AddSlidebar(String sliderName, String windowName, int sliderMin, int sliderMax, int valeurDefaut, int *sliderVal, void(*f)(int, void *), void *r);
+void UpdateThreshold(int , void * r);
+void  KeyboardViz3d(const viz::KeyboardEvent &w, void *t);
+
+
+void DrawHistogram3D(Histo3DData &h)
+{
+    //! [get_cube_size]
+    int planSize = h.histogram.step1(0);
+    int cols = h.histogram.step1(1);
+    int rows = planSize / cols;
+    int plans = h.histogram.total() / planSize;
+    h.fen3D->removeAllWidgets();
+    h.nbWidget=0;
+    if (h.nbWidget==0)
+        h.fen3D->showWidget("Axis", viz::WCoordinateSystem(10));
+    //! [get_cube_size]
+    //! [get_cube_values]
+    for (int k = 0; k < plans; k++)
+    {
+        for (int i = 0; i < rows; i++)
+        {
+            for (int j = 0; j < cols; j++)
+            {
+                double x = h.histogram.at<float>(k, i, j);
+                if (x >= h.threshold)
+                {
+                    double r=std::max(x/h.maxH,0.1);
+                    viz::WCube s(Point3d(k - r / 2, i - r / 2, j - r / 2), Point3d(k + r / 2, i + r / 2, j + r / 2), false, viz::Color(j / double(plans) * 255, i / double(rows) * 255, k / double(cols) * 255));
+                    h.fen3D->showWidget(format("I3d%d", h.nbWidget++), s);
+                }
+            }
+        }
+    }
+    //! [get_cube_values]
+    h.status = false;
+}
+//! [viz_keyboard_callback]
+void  KeyboardViz3d(const viz::KeyboardEvent &w, void *t)
+{
+   Histo3DData *x=(Histo3DData *)t;
+   if (w.action)
+       cout << "you pressed "<< w.symbol<< " in viz window "<<x->fen3D->getWindowName()<<"\n";
+   x->code= w.code;
+   switch (w.code) {
+   case '/':
+           x->status=true;
+           x->threshold *= 0.9;
+       break;
+   case '*':
+       x->status = true;
+           x->threshold *= 1.1;
+       break;
+    }
+   if (x->status)
+   {
+       cout <<  x->threshold << "\n";
+       DrawHistogram3D(*x);
+   }
+}
+//! [viz_keyboard_callback]
+
+
+void AddSlidebar(String sliderName, String windowName, int sliderMin, int sliderMax, int defaultSlider, int *sliderVal, void(*f)(int, void *), void *r)
+{
+    createTrackbar(sliderName, windowName, sliderVal, 1, f, r);
+    setTrackbarMin(sliderName, windowName, sliderMin);
+    setTrackbarMax(sliderName, windowName, sliderMax);
+    setTrackbarPos(sliderName, windowName, defaultSlider);
+}
+
+
+void UpdateThreshold(int , void * r)
+{
+    Histo3DData *h = (Histo3DData *)r;
+    h->status=true;
+    h->threshold = h->seuil/1000000.0;
+    cout<<"Widget : "<<h->nbWidget<<","<< h->threshold<<"\n";
+}
+
+int main (int argc,char **argv)
+{
+    //! [command_line_parser]
+    CommandLineParser parser(argc, argv, keys);
+
+    if (parser.has("help"))
+    {
+        parser.printMessage();
+        return 0;
+    }
+    String nomFic = parser.get<String>(0);
+    Mat img;
+    if (nomFic.length() != 0)
+    {
+        img = imread(nomFic, IMREAD_COLOR);
+        if (img.empty())
+        {
+            cout << "Image does not exist!";
+            return 0;
+        }
+    }
+    //! [command_line_parser]
+    //! [synthetic_image]
+    else
+    {
+        img = Mat(512,512,CV_8UC3);
+        parser.printMessage();
+        RNG r;
+        r.fill(img(Rect(0, 0, 256, 256)), RNG::NORMAL, Vec3b(60, 40, 50), Vec3b(10, 5, 20));
+        r.fill(img(Rect(256, 0, 256, 256)), RNG::NORMAL, Vec3b(160, 10, 50), Vec3b(20, 5, 10));
+        r.fill(img(Rect(0, 256, 256, 256)), RNG::NORMAL, Vec3b(90, 100, 50), Vec3b(10, 20, 20));
+        r.fill(img(Rect(256, 256, 256, 256)), RNG::NORMAL, Vec3b(100, 10, 150), Vec3b(10, 5, 40));
+    }
+    //! [synthetic_image]
+    //! [calchist_for_histo3d]
+    Histo3DData h;
+    h.status=true;
+    h.seuil=90;
+    h.threshold= h.seuil/1000000.0;
+    float hRange[] = { 0, 256 };
+    const float* etendu[] = { hRange, hRange,hRange };
+    int hBins = 32;
+    int histSize[] = { hBins, hBins , hBins  };
+    int channel[] = { 2, 1,0 };
+    calcHist(&img, 1, channel, Mat(), h.histogram, 3, histSize, etendu, true, false);
+    normalize(h.histogram, h.histogram, 100.0/(img.total()), 0, NORM_MINMAX, -1, Mat());
+    minMaxIdx(h.histogram,NULL,&h.maxH,NULL,NULL);
+    //! [calchist_for_histo3d]
+    //! [slide_bar_for_thresh]
+    namedWindow("Image");
+    imshow("Image",img);
+    AddSlidebar("threshold","Image",0,100,h.seuil,&h.seuil, UpdateThreshold,&h);
+    waitKey(30);
+    //! [slide_bar_for_thresh]
+    //! [manage_viz_imshow_window]
+    h.fen3D = new viz::Viz3d("3D Histogram");
+    h.nbWidget=0;
+    h.fen3D->registerKeyboardCallback(KeyboardViz3d,&h);
+    DrawHistogram3D(h);
+    while (h.code!=27)
+    {
+        h.fen3D->spinOnce(1);
+        if (h.status)
+            DrawHistogram3D(h);
+        if (h.code!=27)
+            h.code= waitKey(30);
+    }
+    //! [manage_viz_imshow_window]
+    return 0;
+}
+#else
+
+int main(int argc, char **argv)
+{
+cout << " you need VIZ module\n";
+return 0;
+}
+#endif
diff --git a/samples/data/ellipses.jpg b/samples/data/ellipses.jpg
new file mode 100644 (file)
index 0000000..def6f7b
Binary files /dev/null and b/samples/data/ellipses.jpg differ
diff --git a/samples/data/smarties.png b/samples/data/smarties.png
new file mode 100644 (file)
index 0000000..90bf22d
Binary files /dev/null and b/samples/data/smarties.png differ
index 256bcb4..f40b0d7 100644 (file)
@@ -91,21 +91,26 @@ int main(int argc, char **argv)
     String modelBin = "bvlc_googlenet.caffemodel";
     String imageFile = (argc > 1) ? argv[1] : "space_shuttle.jpg";
 
-    //! [Read and initialize network]
-    Net net = dnn::readNetFromCaffe(modelTxt, modelBin);
-    //! [Read and initialize network]
-
-    //! [Check that network was read successfully]
-    if (net.empty())
-    {
-        std::cerr << "Can't load network by using the following files: " << std::endl;
-        std::cerr << "prototxt:   " << modelTxt << std::endl;
-        std::cerr << "caffemodel: " << modelBin << std::endl;
-        std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
-        std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
-        exit(-1);
+    Net net;
+    try {
+        //! [Read and initialize network]
+        net = dnn::readNetFromCaffe(modelTxt, modelBin);
+        //! [Read and initialize network]
+    }
+    catch (cv::Exception& e) {
+        std::cerr << "Exception: " << e.what() << std::endl;
+        //! [Check that network was read successfully]
+        if (net.empty())
+        {
+            std::cerr << "Can't load network by using the following files: " << std::endl;
+            std::cerr << "prototxt:   " << modelTxt << std::endl;
+            std::cerr << "caffemodel: " << modelBin << std::endl;
+            std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
+            std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
+            exit(-1);
+        }
+        //! [Check that network was read successfully]
     }
-    //! [Check that network was read successfully]
 
     //! [Prepare blob]
     Mat img = imread(imageFile);
@@ -115,9 +120,9 @@ int main(int argc, char **argv)
         exit(-1);
     }
 
-    //GoogLeNet accepts only 224x224 RGB-images
-    Mat inputBlob = blobFromImage(img, 1, Size(224, 224),
-                                  Scalar(104, 117, 123));   //Convert Mat to batch of images
+    //GoogLeNet accepts only 224x224 BGR-images
+    Mat inputBlob = blobFromImage(img, 1.0f, Size(224, 224),
+                                  Scalar(104, 117, 123), false);   //Convert Mat to batch of images
     //! [Prepare blob]
 
     Mat prob;
diff --git a/samples/dnn/colorization.py b/samples/dnn/colorization.py
new file mode 100644 (file)
index 0000000..3f5ff3b
--- /dev/null
@@ -0,0 +1,67 @@
+# Script is based on https://github.com/richzhang/colorization/colorize.py
+import numpy as np
+import argparse
+import cv2 as cv
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='iColor: deep interactive colorization')
+    parser.add_argument('--input', help='Path to image or video. Skip to capture frames from camera')
+    parser.add_argument('--prototxt', help='Path to colorization_deploy_v2.prototxt', default='./models/colorization_release_v2.prototxt')
+    parser.add_argument('--caffemodel', help='Path to colorization_release_v2.caffemodel', default='./models/colorization_release_v2.caffemodel')
+    parser.add_argument('--kernel', help='Path to pts_in_hull.npy', default='./resources/pts_in_hull.npy')
+
+    args = parser.parse_args()
+    return args
+
+if __name__ == '__main__':
+    W_in = 224
+    H_in = 224
+    imshowSize = (640, 480)
+
+    args = parse_args()
+
+    # Select desired model
+    net = cv.dnn.readNetFromCaffe(args.prototxt, args.caffemodel)
+
+    pts_in_hull = np.load(args.kernel) # load cluster centers
+
+    # populate cluster centers as 1x1 convolution kernel
+    pts_in_hull = pts_in_hull.transpose().reshape(2, 313, 1, 1)
+    net.getLayer(long(net.getLayerId('class8_ab'))).blobs = [pts_in_hull.astype(np.float32)]
+    net.getLayer(long(net.getLayerId('conv8_313_rh'))).blobs = [np.full([1, 313], 2.606, np.float32)]
+
+    if args.input:
+        cap = cv.VideoCapture(args.input)
+    else:
+        cap = cv.VideoCapture(0)
+
+    while cv.waitKey(1) < 0:
+        hasFrame, frame = cap.read()
+        if not hasFrame:
+            cv.waitKey()
+            break
+
+        img_rgb = (frame[:,:,[2, 1, 0]] * 1.0 / 255).astype(np.float32)
+
+        img_lab = cv.cvtColor(img_rgb, cv.COLOR_RGB2Lab)
+        img_l = img_lab[:,:,0] # pull out L channel
+        (H_orig,W_orig) = img_rgb.shape[:2] # original image size
+
+        # resize image to network input size
+        img_rs = cv.resize(img_rgb, (W_in, H_in)) # resize image to network input size
+        img_lab_rs = cv.cvtColor(img_rs, cv.COLOR_RGB2Lab)
+        img_l_rs = img_lab_rs[:,:,0]
+        img_l_rs -= 50 # subtract 50 for mean-centering
+
+        net.setInput(cv.dnn.blobFromImage(img_l_rs))
+        ab_dec = net.forward('class8_ab')[0,:,:,:].transpose((1,2,0)) # this is our result
+
+        (H_out,W_out) = ab_dec.shape[:2]
+        ab_dec_us = cv.resize(ab_dec, (W_orig, H_orig))
+        img_lab_out = np.concatenate((img_l[:,:,np.newaxis],ab_dec_us),axis=2) # concatenate with original image L
+        img_bgr_out = np.clip(cv.cvtColor(img_lab_out, cv.COLOR_Lab2BGR), 0, 1)
+
+        frame = cv.resize(frame, imshowSize)
+        cv.imshow('origin', frame)
+        cv.imshow('gray', cv.cvtColor(frame, cv.COLOR_RGB2GRAY))
+        cv.imshow('colorized', cv.resize(img_bgr_out, imshowSize))
index 94ff267..f81fae9 100644 (file)
@@ -113,8 +113,8 @@ int main(int argc, char **argv)
         exit(-1);
     }
 
-    resize(img, img, Size(500, 500));       //FCN accepts 500x500 RGB-images
-    Mat inputBlob = blobFromImage(img);   //Convert Mat to batch of images
+    resize(img, img, Size(500, 500));       //FCN accepts 500x500 BGR-images
+    Mat inputBlob = blobFromImage(img, 1, Size(), Scalar(), false);   //Convert Mat to batch of images
     //! [Prepare blob]
 
     //! [Set input blob]
index 1db2cbf..0a5caaa 100644 (file)
@@ -11,7 +11,7 @@ def get_class_list():
     with open('synset_words.txt', 'rt') as f:
         return [x[x.find(" ") + 1:] for x in f]
 
-blob = dnn.blobFromImage(cv2.imread('space_shuttle.jpg'), 1, (224, 224), (104, 117, 123))
+blob = dnn.blobFromImage(cv2.imread('space_shuttle.jpg'), 1, (224, 224), (104, 117, 123), False)
 print("Input:", blob.shape, blob.dtype)
 
 net = dnn.readNetFromCaffe('bvlc_googlenet.prototxt', 'bvlc_googlenet.caffemodel')
diff --git a/samples/dnn/mobilenet_ssd_accuracy.py b/samples/dnn/mobilenet_ssd_accuracy.py
new file mode 100644 (file)
index 0000000..378d2fe
--- /dev/null
@@ -0,0 +1,131 @@
+# Script to evaluate MobileNet-SSD object detection model trained in TensorFlow
+# using both TensorFlow and OpenCV. Example:
+#
+# python mobilenet_ssd_accuracy.py \
+#   --weights=frozen_inference_graph.pb \
+#   --prototxt=ssd_mobilenet_v1_coco.pbtxt \
+#   --images=val2017 \
+#   --annotations=annotations/instances_val2017.json
+#
+# Tested on COCO 2017 object detection dataset, http://cocodataset.org/#download
+import os
+import cv2 as cv
+import json
+import argparse
+
+parser = argparse.ArgumentParser(
+    description='Evaluate MobileNet-SSD model using both TensorFlow and OpenCV. '
+                'COCO evaluation framework is required: http://cocodataset.org')
+parser.add_argument('--weights', required=True,
+                    help='Path to frozen_inference_graph.pb of MobileNet-SSD model. '
+                         'Download it at https://github.com/tensorflow/models/tree/master/research/object_detection')
+parser.add_argument('--prototxt', help='Path to ssd_mobilenet_v1_coco.pbtxt from opencv_extra.', required=True)
+parser.add_argument('--images', help='Path to COCO validation images directory.', required=True)
+parser.add_argument('--annotations', help='Path to COCO annotations file.', required=True)
+args = parser.parse_args()
+
+### Get OpenCV predictions #####################################################
+net = cv.dnn.readNetFromTensorflow(args.weights, args.prototxt)
+
+detections = []
+for imgName in os.listdir(args.images):
+    inp = cv.imread(os.path.join(args.images, imgName))
+    rows = inp.shape[0]
+    cols = inp.shape[1]
+    inp = cv.resize(inp, (300, 300))
+
+    net.setInput(cv.dnn.blobFromImage(inp, 1.0/127.5, (300, 300), (127.5, 127.5, 127.5), True))
+    out = net.forward()
+
+    for i in range(out.shape[2]):
+        score = float(out[0, 0, i, 2])
+        # Confidence threshold is in prototxt.
+        classId = int(out[0, 0, i, 1])
+
+        x = out[0, 0, i, 3] * cols
+        y = out[0, 0, i, 4] * rows
+        w = out[0, 0, i, 5] * cols - x
+        h = out[0, 0, i, 6] * rows - y
+        detections.append({
+          "image_id": int(imgName.rstrip('0')[:imgName.rfind('.')]),
+          "category_id": classId,
+          "bbox": [x, y, w, h],
+          "score": score
+        })
+
+with open('cv_result.json', 'wt') as f:
+    json.dump(detections, f)
+
+### Get TensorFlow predictions #################################################
+import tensorflow as tf
+
+with tf.gfile.FastGFile(args.weights) as f:
+    # Load the model
+    graph_def = tf.GraphDef()
+    graph_def.ParseFromString(f.read())
+
+with tf.Session() as sess:
+    # Restore session
+    sess.graph.as_default()
+    tf.import_graph_def(graph_def, name='')
+
+    detections = []
+    for imgName in os.listdir(args.images):
+        inp = cv.imread(os.path.join(args.images, imgName))
+        rows = inp.shape[0]
+        cols = inp.shape[1]
+        inp = cv.resize(inp, (300, 300))
+        inp = inp[:, :, [2, 1, 0]]  # BGR2RGB
+        out = sess.run([sess.graph.get_tensor_by_name('num_detections:0'),
+                        sess.graph.get_tensor_by_name('detection_scores:0'),
+                        sess.graph.get_tensor_by_name('detection_boxes:0'),
+                        sess.graph.get_tensor_by_name('detection_classes:0')],
+                       feed_dict={'image_tensor:0': inp.reshape(1, inp.shape[0], inp.shape[1], 3)})
+        num_detections = int(out[0][0])
+        for i in range(num_detections):
+            classId = int(out[3][0][i])
+            score = float(out[1][0][i])
+            bbox = [float(v) for v in out[2][0][i]]
+            if score > 0.01:
+                x = bbox[1] * cols
+                y = bbox[0] * rows
+                w = bbox[3] * cols - x
+                h = bbox[2] * rows - y
+                detections.append({
+                  "image_id": int(imgName.rstrip('0')[:imgName.rfind('.')]),
+                  "category_id": classId,
+                  "bbox": [x, y, w, h],
+                  "score": score
+                })
+
+with open('tf_result.json', 'wt') as f:
+    json.dump(detections, f)
+
+### Evaluation part ############################################################
+
+# %matplotlib inline
+import matplotlib.pyplot as plt
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+import numpy as np
+import skimage.io as io
+import pylab
+pylab.rcParams['figure.figsize'] = (10.0, 8.0)
+
+annType = ['segm','bbox','keypoints']
+annType = annType[1]      #specify type here
+prefix = 'person_keypoints' if annType=='keypoints' else 'instances'
+print 'Running demo for *%s* results.'%(annType)
+
+#initialize COCO ground truth api
+cocoGt=COCO(args.annotations)
+
+#initialize COCO detections api
+for resFile in ['tf_result.json', 'cv_result.json']:
+    print resFile
+    cocoDt=cocoGt.loadRes(resFile)
+
+    cocoEval = COCOeval(cocoGt,cocoDt,annType)
+    cocoEval.evaluate()
+    cocoEval.accumulate()
+    cocoEval.summarize()
index 7a0c5ff..f031a7c 100644 (file)
@@ -1,3 +1,14 @@
+# This script is used to demonstrate MobileNet-SSD network using OpenCV deep learning module.
+#
+# It works with model taken from https://github.com/chuanqi305/MobileNet-SSD/ that
+# was trained in Caffe-SSD framework, https://github.com/weiliu89/caffe/tree/ssd.
+# Model detects objects from 20 classes.
+#
+# Also TensorFlow model from TensorFlow object detection model zoo may be used to
+# detect objects from 90 classes:
+# https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
+# Text graph definition must be taken from opencv_extra:
+# https://github.com/opencv/opencv_extra/tree/master/testdata/dnn/ssd_mobilenet_v1_coco.pbtxt
 import numpy as np
 import argparse
 
@@ -13,27 +24,58 @@ WHRatio = inWidth / float(inHeight)
 inScaleFactor = 0.007843
 meanVal = 127.5
 
-classNames = ('background',
-              'aeroplane', 'bicycle', 'bird', 'boat',
-              'bottle', 'bus', 'car', 'cat', 'chair',
-              'cow', 'diningtable', 'dog', 'horse',
-              'motorbike', 'person', 'pottedplant',
-              'sheep', 'sofa', 'train', 'tvmonitor')
-
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(
+        description='Script to run MobileNet-SSD object detection network '
+                    'trained either in Caffe or TensorFlow frameworks.')
     parser.add_argument("--video", help="path to video file. If empty, camera's stream will be used")
     parser.add_argument("--prototxt", default="MobileNetSSD_deploy.prototxt",
-                        help="path to caffe prototxt")
-    parser.add_argument("-c", "--caffemodel", default="MobileNetSSD_deploy.caffemodel",
-                        help="path to caffemodel file, download it here: "
-                        "https://github.com/chuanqi305/MobileNet-SSD/")
-    parser.add_argument("--thr", default=0.2, help="confidence threshold to filter out weak detections")
+                                      help='Path to text network file: '
+                                           'MobileNetSSD_deploy.prototxt for Caffe model or '
+                                           'ssd_mobilenet_v1_coco.pbtxt from opencv_extra for TensorFlow model')
+    parser.add_argument("--weights", default="MobileNetSSD_deploy.caffemodel",
+                                     help='Path to weights: '
+                                          'MobileNetSSD_deploy.caffemodel for Caffe model or '
+                                          'frozen_inference_graph.pb from TensorFlow.')
+    parser.add_argument("--num_classes", default=20, type=int,
+                        help="Number of classes. It's 20 for Caffe model from "
+                             "https://github.com/chuanqi305/MobileNet-SSD/ and 90 for "
+                             "TensorFlow model from https://github.com/tensorflow/models/tree/master/research/object_detection")
+    parser.add_argument("--thr", default=0.2, type=float, help="confidence threshold to filter out weak detections")
     args = parser.parse_args()
 
-    net = cv.dnn.readNetFromCaffe(args.prototxt, args.caffemodel)
-
-    if len(args.video):
+    if args.num_classes == 20:
+        net = cv.dnn.readNetFromCaffe(args.prototxt, args.weights)
+        swapRB = False
+        classNames = { 0: 'background',
+            1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat',
+            5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat', 9: 'chair',
+            10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse',
+            14: 'motorbike', 15: 'person', 16: 'pottedplant',
+            17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor' }
+    else:
+        assert(args.num_classes == 90)
+        net = cv.dnn.readNetFromTensorflow(args.weights, args.prototxt)
+        swapRB = True
+        classNames = { 0: 'background',
+            1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus',
+            7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light', 11: 'fire hydrant',
+            13: 'stop sign', 14: 'parking meter', 15: 'bench', 16: 'bird', 17: 'cat',
+            18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear',
+            24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella', 31: 'handbag',
+            32: 'tie', 33: 'suitcase', 34: 'frisbee', 35: 'skis', 36: 'snowboard',
+            37: 'sports ball', 38: 'kite', 39: 'baseball bat', 40: 'baseball glove',
+            41: 'skateboard', 42: 'surfboard', 43: 'tennis racket', 44: 'bottle',
+            46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon',
+            51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange',
+            56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza', 60: 'donut',
+            61: 'cake', 62: 'chair', 63: 'couch', 64: 'potted plant', 65: 'bed',
+            67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse',
+            75: 'remote', 76: 'keyboard', 77: 'cell phone', 78: 'microwave', 79: 'oven',
+            80: 'toaster', 81: 'sink', 82: 'refrigerator', 84: 'book', 85: 'clock',
+            86: 'vase', 87: 'scissors', 88: 'teddy bear', 89: 'hair drier', 90: 'toothbrush' }
+
+    if args.video:
         cap = cv.VideoCapture(args.video)
     else:
         cap = cv.VideoCapture(0)
@@ -41,7 +83,7 @@ if __name__ == "__main__":
     while True:
         # Capture frame-by-frame
         ret, frame = cap.read()
-        blob = cv.dnn.blobFromImage(frame, inScaleFactor, (inWidth, inHeight), meanVal)
+        blob = cv.dnn.blobFromImage(frame, inScaleFactor, (inWidth, inHeight), (meanVal, meanVal, meanVal), swapRB)
         net.setInput(blob)
         detections = net.forward()
 
@@ -74,14 +116,16 @@ if __name__ == "__main__":
 
                 cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop),
                               (0, 255, 0))
-                label = classNames[class_id] + ": " + str(confidence)
-                labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
-
-                cv.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]),
-                                     (xLeftBottom + labelSize[0], yLeftBottom + baseLine),
-                                     (255, 255, 255), cv.FILLED)
-                cv.putText(frame, label, (xLeftBottom, yLeftBottom),
-                            cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
+                if class_id in classNames:
+                    label = classNames[class_id] + ": " + str(confidence)
+                    labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+
+                    yLeftBottom = max(yLeftBottom, labelSize[1])
+                    cv.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]),
+                                         (xLeftBottom + labelSize[0], yLeftBottom + baseLine),
+                                         (255, 255, 255), cv.FILLED)
+                    cv.putText(frame, label, (xLeftBottom, yLeftBottom),
+                                cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
 
         cv.imshow("detections", frame)
         if cv.waitKey(1) >= 0:
index d8a102a..172ee23 100644 (file)
@@ -27,8 +27,7 @@ if __name__ == '__main__':
         cols = frame.shape[1]
         rows = frame.shape[0]
 
-        net.setInput(dnn.blobFromImage(cv.resize(frame, (inWidth, inHeight)),
-                                       1.0, (inWidth, inHeight), (104., 177., 123.)))
+        net.setInput(dnn.blobFromImage(frame, 1.0, (inWidth, inHeight), (104.0, 177.0, 123.0), False))
         detections = net.forward()
 
         perf_stats = net.getPerfProfile()
diff --git a/samples/dnn/shrink_tf_graph_weights.py b/samples/dnn/shrink_tf_graph_weights.py
new file mode 100644 (file)
index 0000000..799d6c7
--- /dev/null
@@ -0,0 +1,62 @@
+# This file is part of OpenCV project.
+# It is subject to the license terms in the LICENSE file found in the top-level directory
+# of this distribution and at http://opencv.org/license.html.
+#
+# Copyright (C) 2017, Intel Corporation, all rights reserved.
+# Third party copyrights are property of their respective owners.
+import tensorflow as tf
+import struct
+import argparse
+import numpy as np
+
+parser = argparse.ArgumentParser(description='Convert weights of a frozen TensorFlow graph to fp16.')
+parser.add_argument('--input', required=True, help='Path to frozen graph.')
+parser.add_argument('--output', required=True, help='Path to output graph.')
+parser.add_argument('--ops', default=['Conv2D', 'MatMul'], nargs='+',
+                    help='List of ops which weights are converted.')
+args = parser.parse_args()
+
+DT_FLOAT = 1
+DT_HALF = 19
+
+# For the frozen graphs, an every node that uses weights connected to Const nodes
+# through an Identity node. Usually they're called in the same way with '/read' suffix.
+# We'll replace all of them to Cast nodes.
+
+# Load the model
+with tf.gfile.FastGFile(args.input) as f:
+    graph_def = tf.GraphDef()
+    graph_def.ParseFromString(f.read())
+
+# Set of all inputs from desired nodes.
+inputs = []
+for node in graph_def.node:
+    if node.op in args.ops:
+        inputs += node.input
+
+weightsNodes = []
+for node in graph_def.node:
+    # From the whole inputs we need to keep only an Identity nodes.
+    if node.name in inputs and node.op == 'Identity' and node.attr['T'].type == DT_FLOAT:
+        weightsNodes.append(node.input[0])
+
+        # Replace Identity to Cast.
+        node.op = 'Cast'
+        node.attr['DstT'].type = DT_FLOAT
+        node.attr['SrcT'].type = DT_HALF
+        del node.attr['T']
+        del node.attr['_class']
+
+# Convert weights to halfs.
+for node in graph_def.node:
+    if node.name in weightsNodes:
+        node.attr['dtype'].type = DT_HALF
+        node.attr['value'].tensor.dtype = DT_HALF
+
+        floats = node.attr['value'].tensor.tensor_content
+
+        floats = struct.unpack('f' * (len(floats) / 4), floats)
+        halfs = np.array(floats).astype(np.float16).view(np.uint16)
+        node.attr['value'].tensor.tensor_content = struct.pack('H' * len(halfs), *halfs)
+
+tf.train.write_graph(graph_def, "", args.output, as_text=False)
index 3d785f5..283c08a 100644 (file)
@@ -97,7 +97,7 @@ int main(int argc, char** argv)
         //! [Prepare blob]
 
         Mat inputBlob = blobFromImage(frame, inScaleFactor,
-                                      Size(inWidth, inHeight), meanVal); //Convert Mat to batch of images
+                                      Size(inWidth, inHeight), meanVal, false); //Convert Mat to batch of images
         //! [Prepare blob]
 
         //! [Set input blob]
index 214dd91..09e983f 100644 (file)
@@ -86,7 +86,7 @@ int main(int argc, char** argv)
     //! [Prepare blob]
     Mat preprocessedFrame = preprocess(frame);
 
-    Mat inputBlob = blobFromImage(preprocessedFrame); //Convert Mat to batch of images
+    Mat inputBlob = blobFromImage(preprocessedFrame, 1.0f, Size(), Scalar(), false); //Convert Mat to batch of images
     //! [Prepare blob]
 
     //! [Set input blob]
diff --git a/samples/java/tutorial_code/ImgProc/HitMiss/HitMiss.java b/samples/java/tutorial_code/ImgProc/HitMiss/HitMiss.java
new file mode 100644 (file)
index 0000000..806537f
--- /dev/null
@@ -0,0 +1,58 @@
+import org.opencv.core.*;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgproc.Imgproc;
+
+class HitMissRun{
+
+    public void run() {
+        Mat input_image = new Mat( 8, 8, CvType.CV_8UC1 );
+        int row = 0, col = 0;
+        input_image.put(row ,col,
+                0, 0, 0, 0, 0, 0, 0, 0,
+                0, 255, 255, 255, 0, 0, 0, 255,
+                0, 255, 255, 255, 0, 0, 0, 0,
+                0, 255, 255, 255, 0, 255, 0, 0,
+                0, 0, 255, 0, 0, 0, 0, 0,
+                0, 0, 255, 0, 0, 255, 255, 0,
+                0, 255, 0, 255, 0, 0, 255, 0,
+                0, 255, 255, 255, 0, 0, 0, 0);
+
+        Mat kernel = new Mat( 3, 3, CvType.CV_16S );
+        kernel.put(row ,col,
+                0, 1, 0,
+                1, -1, 1,
+                0, 1, 0 );
+
+        Mat output_image = new Mat();
+        Imgproc.morphologyEx(input_image, output_image, Imgproc.MORPH_HITMISS, kernel);
+
+        int rate = 50;
+        Core.add(kernel, new Scalar(1), kernel);
+        Core.multiply(kernel, new Scalar(127), kernel);
+        kernel.convertTo(kernel, CvType.CV_8U);
+
+        Imgproc.resize(kernel, kernel, new Size(), rate, rate, Imgproc.INTER_NEAREST);
+        HighGui.imshow("kernel", kernel);
+        HighGui.moveWindow("kernel", 0, 0);
+
+        Imgproc.resize(input_image, input_image, new Size(), rate, rate, Imgproc.INTER_NEAREST);
+        HighGui.imshow("Original", input_image);
+        HighGui.moveWindow("Original", 0, 200);
+
+        Imgproc.resize(output_image, output_image, new Size(), rate, rate, Imgproc.INTER_NEAREST);
+        HighGui.imshow("Hit or Miss", output_image);
+        HighGui.moveWindow("Hit or Miss", 500, 200);
+
+        HighGui.waitKey(0);
+        System.exit(0);
+    }
+}
+
+public class HitMiss
+{
+    public static void main(String[] args) {
+        // load the native OpenCV library
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+        new HitMissRun().run();
+    }
+}
diff --git a/samples/java/tutorial_code/ImgProc/Pyramids/Pyramids.java b/samples/java/tutorial_code/ImgProc/Pyramids/Pyramids.java
new file mode 100644 (file)
index 0000000..b4ada16
--- /dev/null
@@ -0,0 +1,67 @@
+import org.opencv.core.*;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgproc.Imgproc;
+
+class PyramidsRun {
+
+    String window_name = "Pyramids Demo";
+
+    public void run(String[] args) {
+        /// General instructions
+        System.out.println("\n" +
+                " Zoom In-Out demo    \n" +
+                "------------------   \n" +
+                " * [i] -> Zoom [i]n  \n" +
+                " * [o] -> Zoom [o]ut \n" +
+                " * [ESC] -> Close program \n");
+
+        //! [load]
+        String filename = ((args.length > 0) ? args[0] : "../data/chicky_512.png");
+
+        // Load the image
+        Mat src = Imgcodecs.imread(filename);
+
+        // Check if image is loaded fine
+        if( src.empty() ) {
+            System.out.println("Error opening image!");
+            System.out.println("Program Arguments: [image_name -- default ../data/chicky_512.png] \n");
+            System.exit(-1);
+        }
+        //! [load]
+
+        //! [loop]
+        while (true){
+            //! [show_image]
+            HighGui.imshow( window_name, src );
+            //! [show_image]
+            char c = (char) HighGui.waitKey(0);
+            c = Character.toLowerCase(c);
+
+            if( c == 27 ){
+                break;
+                //![pyrup]
+            }else if( c == 'i'){
+                Imgproc.pyrUp( src, src, new Size( src.cols()*2, src.rows()*2 ) );
+                System.out.println( "** Zoom In: Image x 2" );
+                //![pyrup]
+                //![pyrdown]
+            }else if( c == 'o'){
+                Imgproc.pyrDown( src, src, new Size( src.cols()/2, src.rows()/2 ) );
+                System.out.println( "** Zoom Out: Image / 2" );
+                //![pyrdown]
+            }
+        }
+        //! [loop]
+
+        System.exit(0);
+    }
+}
+
+public class Pyramids {
+    public static void main(String[] args) {
+        // Load the native library.
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+        new PyramidsRun().run(args);
+    }
+}
diff --git a/samples/java/tutorial_code/ImgProc/Smoothing/Smoothing.java b/samples/java/tutorial_code/ImgProc/Smoothing/Smoothing.java
new file mode 100644 (file)
index 0000000..b4d9630
--- /dev/null
@@ -0,0 +1,101 @@
+import org.opencv.core.*;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgproc.Imgproc;
+
+class SmoothingRun {
+
+    ///  Global Variables
+    int DELAY_CAPTION = 1500;
+    int DELAY_BLUR = 100;
+    int MAX_KERNEL_LENGTH = 31;
+
+    Mat src = new Mat(), dst = new Mat();
+    String windowName = "Filter Demo 1";
+
+    public void run(String[] args) {
+
+        String filename = ((args.length > 0) ? args[0] : "../data/lena.jpg");
+
+        src = Imgcodecs.imread(filename, Imgcodecs.IMREAD_COLOR);
+        if( src.empty() ) {
+            System.out.println("Error opening image");
+            System.out.println("Usage: ./Smoothing [image_name -- default ../data/lena.jpg] \n");
+            System.exit(-1);
+        }
+
+        if( displayCaption( "Original Image" ) != 0 ) { System.exit(0); }
+
+        dst = src.clone();
+        if( displayDst( DELAY_CAPTION ) != 0 ) { System.exit(0); }
+
+        /// Applying Homogeneous blur
+        if( displayCaption( "Homogeneous Blur" ) != 0 ) { System.exit(0); }
+
+        //! [blur]
+        for (int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2) {
+            Imgproc.blur(src, dst, new Size(i, i), new Point(-1, -1));
+            displayDst(DELAY_BLUR);
+        }
+        //! [blur]
+
+        /// Applying Gaussian blur
+        if( displayCaption( "Gaussian Blur" ) != 0 ) { System.exit(0); }
+
+        //! [gaussianblur]
+        for (int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2) {
+            Imgproc.GaussianBlur(src, dst, new Size(i, i), 0, 0);
+            displayDst(DELAY_BLUR);
+        }
+        //! [gaussianblur]
+
+        /// Applying Median blur
+        if( displayCaption( "Median Blur" ) != 0 ) { System.exit(0); }
+
+        //! [medianblur]
+        for (int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2) {
+            Imgproc.medianBlur(src, dst, i);
+            displayDst(DELAY_BLUR);
+        }
+        //! [medianblur]
+
+        /// Applying Bilateral Filter
+        if( displayCaption( "Bilateral Blur" ) != 0 ) { System.exit(0); }
+
+        //![bilateralfilter]
+        for (int i = 1; i < MAX_KERNEL_LENGTH; i = i + 2) {
+            Imgproc.bilateralFilter(src, dst, i, i * 2, i / 2);
+            displayDst(DELAY_BLUR);
+        }
+        //![bilateralfilter]
+
+        /// Done
+        displayCaption( "Done!" );
+
+        System.exit(0);
+    }
+
+    int displayCaption(String caption) {
+        dst = Mat.zeros(src.size(), src.type());
+        Imgproc.putText(dst, caption,
+                new Point(src.cols() / 4, src.rows() / 2),
+                Core.FONT_HERSHEY_COMPLEX, 1, new Scalar(255, 255, 255));
+
+        return displayDst(DELAY_CAPTION);
+    }
+
+    int displayDst(int delay) {
+        HighGui.imshow( windowName, dst );
+        int c = HighGui.waitKey( delay );
+        if (c >= 0) { return -1; }
+        return 0;
+    }
+}
+
+public class Smoothing {
+    public static void main(String[] args) {
+        // Load the native library.
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+        new SmoothingRun().run(args);
+    }
+}
diff --git a/samples/java/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.java b/samples/java/tutorial_code/ImgProc/morph_lines_detection/Morphology_3.java
new file mode 100644 (file)
index 0000000..23f7299
--- /dev/null
@@ -0,0 +1,152 @@
+/**
+ * @file Morphology_3.java
+ * @brief Use morphology transformations for extracting horizontal and vertical lines sample code
+ */
+
+import org.opencv.core.*;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgproc.Imgproc;
+
+class Morphology_3Run {
+
+    public void run(String[] args) {
+
+        //! [load_image]
+        // Check number of arguments
+        if (args.length == 0){
+            System.out.println("Not enough parameters!");
+            System.out.println("Program Arguments: [image_path]");
+            System.exit(-1);
+        }
+
+        // Load the image
+        Mat src = Imgcodecs.imread(args[0]);
+
+        // Check if image is loaded fine
+        if( src.empty() ) {
+            System.out.println("Error opening image: " + args[0]);
+            System.exit(-1);
+        }
+
+        // Show source image
+        HighGui.imshow("src", src);
+        //! [load_image]
+
+        //! [gray]
+        // Transform source image to gray if it is not already
+        Mat gray = new Mat();
+
+        if (src.channels() == 3)
+        {
+            Imgproc.cvtColor(src, gray, Imgproc.COLOR_BGR2GRAY);
+        }
+        else
+        {
+            gray = src;
+        }
+
+        // Show gray image
+        showWaitDestroy("gray" , gray);
+        //! [gray]
+
+        //! [bin]
+        // Apply adaptiveThreshold at the bitwise_not of gray
+        Mat bw = new Mat();
+        Core.bitwise_not(gray, gray);
+        Imgproc.adaptiveThreshold(gray, bw, 255, Imgproc.ADAPTIVE_THRESH_MEAN_C, Imgproc.THRESH_BINARY, 15, -2);
+
+        // Show binary image
+        showWaitDestroy("binary" , bw);
+        //! [bin]
+
+        //! [init]
+        // Create the images that will use to extract the horizontal and vertical lines
+        Mat horizontal = bw.clone();
+        Mat vertical = bw.clone();
+        //! [init]
+
+        //! [horiz]
+        // Specify size on horizontal axis
+        int horizontal_size = horizontal.cols() / 30;
+
+        // Create structure element for extracting horizontal lines through morphology operations
+        Mat horizontalStructure = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(horizontal_size,1));
+
+        // Apply morphology operations
+        Imgproc.erode(horizontal, horizontal, horizontalStructure);
+        Imgproc.dilate(horizontal, horizontal, horizontalStructure);
+
+        // Show extracted horizontal lines
+        showWaitDestroy("horizontal" , horizontal);
+        //! [horiz]
+
+        //! [vert]
+        // Specify size on vertical axis
+        int vertical_size = vertical.rows() / 30;
+
+        // Create structure element for extracting vertical lines through morphology operations
+        Mat verticalStructure = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size( 1,vertical_size));
+
+        // Apply morphology operations
+        Imgproc.erode(vertical, vertical, verticalStructure);
+        Imgproc.dilate(vertical, vertical, verticalStructure);
+
+        // Show extracted vertical lines
+        showWaitDestroy("vertical", vertical);
+        //! [vert]
+
+        //! [smooth]
+        // Inverse vertical image
+        Core.bitwise_not(vertical, vertical);
+        showWaitDestroy("vertical_bit" , vertical);
+
+        // Extract edges and smooth image according to the logic
+        // 1. extract edges
+        // 2. dilate(edges)
+        // 3. src.copyTo(smooth)
+        // 4. blur smooth img
+        // 5. smooth.copyTo(src, edges)
+
+        // Step 1
+        Mat edges = new Mat();
+        Imgproc.adaptiveThreshold(vertical, edges, 255, Imgproc.ADAPTIVE_THRESH_MEAN_C, Imgproc.THRESH_BINARY, 3, -2);
+        showWaitDestroy("edges", edges);
+
+        // Step 2
+        Mat kernel = Mat.ones(2, 2, CvType.CV_8UC1);
+        Imgproc.dilate(edges, edges, kernel);
+        showWaitDestroy("dilate", edges);
+
+        // Step 3
+        Mat smooth = new Mat();
+        vertical.copyTo(smooth);
+
+        // Step 4
+        Imgproc.blur(smooth, smooth, new Size(2, 2));
+
+        // Step 5
+        smooth.copyTo(vertical, edges);
+
+        // Show final result
+        showWaitDestroy("smooth - final", vertical);
+        //! [smooth]
+
+        System.exit(0);
+    }
+
+    private void showWaitDestroy(String winname, Mat img) {
+        HighGui.imshow(winname, img);
+        HighGui.moveWindow(winname, 500, 0);
+        HighGui.waitKey(0);
+        HighGui.destroyWindow(winname);
+    }
+}
+
+public class Morphology_3 {
+    public static void main(String[] args) {
+        // Load the native library.
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+        new Morphology_3Run().run(args);
+    }
+}
diff --git a/samples/java/tutorial_code/ImgTrans/Filter2D/Filter2D_Demo.java b/samples/java/tutorial_code/ImgTrans/Filter2D/Filter2D_Demo.java
new file mode 100644 (file)
index 0000000..aa685d9
--- /dev/null
@@ -0,0 +1,81 @@
+/**
+ * @file Filter2D_demo.java
+ * @brief Sample code that shows how to implement your own linear filters by using filter2D function
+ */
+
+import org.opencv.core.*;
+import org.opencv.core.Point;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgproc.Imgproc;
+
+class Filter2D_DemoRun {
+
+    public void run(String[] args) {
+        // Declare variables
+        Mat src, dst = new Mat();
+
+        Mat kernel = new Mat();
+        Point anchor;
+        double delta;
+        int ddepth;
+        int kernel_size;
+        String window_name = "filter2D Demo";
+
+        //! [load]
+        String imageName = ((args.length > 0) ? args[0] : "../data/lena.jpg");
+
+        // Load an image
+        src = Imgcodecs.imread(imageName, Imgcodecs.IMREAD_COLOR);
+
+        // Check if image is loaded fine
+        if( src.empty() ) {
+            System.out.println("Error opening image!");
+            System.out.println("Program Arguments: [image_name -- default ../data/lena.jpg] \n");
+            System.exit(-1);
+        }
+        //! [load]
+
+        //! [init_arguments]
+        // Initialize arguments for the filter
+        anchor = new Point( -1, -1);
+        delta = 0.0;
+        ddepth = -1;
+        //! [init_arguments]
+
+        // Loop - Will filter the image with different kernel sizes each 0.5 seconds
+        int ind = 0;
+        while( true )
+        {
+            //! [update_kernel]
+            // Update kernel size for a normalized box filter
+            kernel_size = 3 + 2*( ind%5 );
+            Mat ones = Mat.ones( kernel_size, kernel_size, CvType.CV_32F );
+            Core.multiply(ones, new Scalar(1/(double)(kernel_size*kernel_size)), kernel);
+            //! [update_kernel]
+
+            //! [apply_filter]
+            // Apply filter
+            Imgproc.filter2D(src, dst, ddepth , kernel, anchor, delta, Core.BORDER_DEFAULT );
+            //! [apply_filter]
+            HighGui.imshow( window_name, dst );
+
+            int c = HighGui.waitKey(500);
+            // Press 'ESC' to exit the program
+            if( c == 27 )
+            { break; }
+
+            ind++;
+        }
+
+        System.exit(0);
+    }
+}
+
+public class Filter2D_Demo {
+    public static void main(String[] args) {
+        // Load the native library.
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+        new Filter2D_DemoRun().run(args);
+    }
+}
diff --git a/samples/java/tutorial_code/ImgTrans/HoughCircle/HoughCircles.java b/samples/java/tutorial_code/ImgTrans/HoughCircle/HoughCircles.java
new file mode 100644 (file)
index 0000000..40f97ff
--- /dev/null
@@ -0,0 +1,77 @@
+package sample;
+/**
+ * @file HoughCircles.java
+ * @brief This program demonstrates circle finding with the Hough transform
+ */
+
+import org.opencv.core.*;
+import org.opencv.core.Point;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgproc.Imgproc;
+
+class HoughCirclesRun {
+
+    public void run(String[] args) {
+
+        //! [load]
+        String default_file = "../../../../data/smarties.png";
+        String filename = ((args.length > 0) ? args[0] : default_file);
+
+        // Load an image
+        Mat src = Imgcodecs.imread(filename, Imgcodecs.IMREAD_COLOR);
+
+        // Check if image is loaded fine
+        if( src.empty() ) {
+            System.out.println("Error opening image!");
+            System.out.println("Program Arguments: [image_name -- default "
+                    + default_file +"] \n");
+            System.exit(-1);
+        }
+        //! [load]
+
+        //! [convert_to_gray]
+        Mat gray = new Mat();
+        Imgproc.cvtColor(src, gray, Imgproc.COLOR_BGR2GRAY);
+        //! [convert_to_gray]
+
+        //![reduce_noise]
+        Imgproc.medianBlur(gray, gray, 5);
+        //![reduce_noise]
+
+        //! [houghcircles]
+        Mat circles = new Mat();
+        Imgproc.HoughCircles(gray, circles, Imgproc.HOUGH_GRADIENT, 1.0,
+                (double)gray.rows()/16, // change this value to detect circles with different distances to each other
+                100.0, 30.0, 1, 30); // change the last two parameters
+                // (min_radius & max_radius) to detect larger circles
+        //! [houghcircles]
+
+        //! [draw]
+        for (int x = 0; x < circles.cols(); x++) {
+            double[] c = circles.get(0, x);
+            Point center = new Point(Math.round(c[0]), Math.round(c[1]));
+            // circle center
+            Imgproc.circle(src, center, 1, new Scalar(0,100,100), 3, 8, 0 );
+            // circle outline
+            int radius = (int) Math.round(c[2]);
+            Imgproc.circle(src, center, radius, new Scalar(255,0,255), 3, 8, 0 );
+        }
+        //! [draw]
+
+        //! [display]
+        HighGui.imshow("detected circles", src);
+        HighGui.waitKey();
+        //! [display]
+
+        System.exit(0);
+    }
+}
+
+public class HoughCircles {
+    public static void main(String[] args) {
+        // Load the native library.
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+        new HoughCirclesRun().run(args);
+    }
+}
diff --git a/samples/java/tutorial_code/ImgTrans/HoughLine/HoughLines.java b/samples/java/tutorial_code/ImgTrans/HoughLine/HoughLines.java
new file mode 100644 (file)
index 0000000..c968d32
--- /dev/null
@@ -0,0 +1,96 @@
+/**
+ * @file HoughLines.java
+ * @brief This program demonstrates line finding with the Hough transform
+ */
+
+import org.opencv.core.*;
+import org.opencv.core.Point;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgproc.Imgproc;
+
+class HoughLinesRun {
+
+    public void run(String[] args) {
+        // Declare the output variables
+        Mat dst = new Mat(), cdst = new Mat(), cdstP;
+
+        //! [load]
+        String default_file = "../../../../data/sudoku.png";
+        String filename = ((args.length > 0) ? args[0] : default_file);
+
+        // Load an image
+        Mat src = Imgcodecs.imread(filename, Imgcodecs.IMREAD_GRAYSCALE);
+
+        // Check if image is loaded fine
+        if( src.empty() ) {
+            System.out.println("Error opening image!");
+            System.out.println("Program Arguments: [image_name -- default "
+                    + default_file +"] \n");
+            System.exit(-1);
+        }
+        //! [load]
+
+        //! [edge_detection]
+        // Edge detection
+        Imgproc.Canny(src, dst, 50, 200, 3, false);
+        //! [edge_detection]
+
+        // Copy edges to the images that will display the results in BGR
+        Imgproc.cvtColor(dst, cdst, Imgproc.COLOR_GRAY2BGR);
+        cdstP = cdst.clone();
+
+        //! [hough_lines]
+        // Standard Hough Line Transform
+        Mat lines = new Mat(); // will hold the results of the detection
+        Imgproc.HoughLines(dst, lines, 1, Math.PI/180, 150); // runs the actual detection
+        //! [hough_lines]
+        //! [draw_lines]
+        // Draw the lines
+        for (int x = 0; x < lines.rows(); x++) {
+            double rho = lines.get(x, 0)[0],
+                    theta = lines.get(x, 0)[1];
+
+            double a = Math.cos(theta), b = Math.sin(theta);
+            double x0 = a*rho, y0 = b*rho;
+            Point pt1 = new Point(Math.round(x0 + 1000*(-b)), Math.round(y0 + 1000*(a)));
+            Point pt2 = new Point(Math.round(x0 - 1000*(-b)), Math.round(y0 - 1000*(a)));
+            Imgproc.line(cdst, pt1, pt2, new Scalar(0, 0, 255), 3, Imgproc.LINE_AA, 0);
+        }
+        //! [draw_lines]
+
+        //! [hough_lines_p]
+        // Probabilistic Line Transform
+        Mat linesP = new Mat(); // will hold the results of the detection
+        Imgproc.HoughLinesP(dst, linesP, 1, Math.PI/180, 50, 50, 10); // runs the actual detection
+        //! [hough_lines_p]
+        //! [draw_lines_p]
+        // Draw the lines
+        for (int x = 0; x < linesP.rows(); x++) {
+            double[] l = linesP.get(x, 0);
+            Imgproc.line(cdstP, new Point(l[0], l[1]), new Point(l[2], l[3]), new Scalar(0, 0, 255), 3, Imgproc.LINE_AA, 0);
+        }
+        //! [draw_lines_p]
+
+        //! [imshow]
+        // Show results
+        HighGui.imshow("Source", src);
+        HighGui.imshow("Detected Lines (in red) - Standard Hough Line Transform", cdst);
+        HighGui.imshow("Detected Lines (in red) - Probabilistic Line Transform", cdstP);
+        //! [imshow]
+
+        //! [exit]
+        // Wait and Exit
+        HighGui.waitKey();
+        System.exit(0);
+        //! [exit]
+    }
+}
+
+public class HoughLines {
+    public static void main(String[] args) {
+        // Load the native library.
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+        new HoughLinesRun().run(args);
+    }
+}
diff --git a/samples/java/tutorial_code/ImgTrans/LaPlace/LaplaceDemo.java b/samples/java/tutorial_code/ImgTrans/LaPlace/LaplaceDemo.java
new file mode 100644 (file)
index 0000000..d9dba70
--- /dev/null
@@ -0,0 +1,73 @@
+/**
+ * @file LaplaceDemo.java
+ * @brief Sample code showing how to detect edges using the Laplace operator
+ */
+
+import org.opencv.core.*;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgproc.Imgproc;
+
+class LaplaceDemoRun {
+
+    public void run(String[] args) {
+        //! [variables]
+        // Declare the variables we are going to use
+        Mat src, src_gray = new Mat(), dst = new Mat();
+        int kernel_size = 3;
+        int scale = 1;
+        int delta = 0;
+        int ddepth = CvType.CV_16S;
+        String window_name = "Laplace Demo";
+        //! [variables]
+
+        //! [load]
+        String imageName = ((args.length > 0) ? args[0] : "../data/lena.jpg");
+
+        src = Imgcodecs.imread(imageName, Imgcodecs.IMREAD_COLOR); // Load an image
+
+        // Check if image is loaded fine
+        if( src.empty() ) {
+            System.out.println("Error opening image");
+            System.out.println("Program Arguments: [image_name -- default ../data/lena.jpg] \n");
+            System.exit(-1);
+        }
+        //! [load]
+
+        //! [reduce_noise]
+        // Reduce noise by blurring with a Gaussian filter ( kernel size = 3 )
+        Imgproc.GaussianBlur( src, src, new Size(3, 3), 0, 0, Core.BORDER_DEFAULT );
+        //! [reduce_noise]
+
+        //! [convert_to_gray]
+        // Convert the image to grayscale
+        Imgproc.cvtColor( src, src_gray, Imgproc.COLOR_RGB2GRAY );
+        //! [convert_to_gray]
+
+        /// Apply Laplace function
+        Mat abs_dst = new Mat();
+        //! [laplacian]
+        Imgproc.Laplacian( src_gray, dst, ddepth, kernel_size, scale, delta, Core.BORDER_DEFAULT );
+        //! [laplacian]
+
+        //! [convert]
+        // converting back to CV_8U
+        Core.convertScaleAbs( dst, abs_dst );
+        //! [convert]
+
+        //! [display]
+        HighGui.imshow( window_name, abs_dst );
+        HighGui.waitKey(0);
+        //! [display]
+
+        System.exit(0);
+    }
+}
+
+public class LaplaceDemo {
+    public static void main(String[] args) {
+        // Load the native library.
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+        new LaplaceDemoRun().run(args);
+    }
+}
diff --git a/samples/java/tutorial_code/ImgTrans/MakeBorder/CopyMakeBorder.java b/samples/java/tutorial_code/ImgTrans/MakeBorder/CopyMakeBorder.java
new file mode 100644 (file)
index 0000000..8fffced
--- /dev/null
@@ -0,0 +1,94 @@
+/**
+ * @file CopyMakeBorder.java
+ * @brief Sample code that shows the functionality of copyMakeBorder
+ */
+
+import org.opencv.core.*;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgcodecs.Imgcodecs;
+
+import java.util.Random;
+
+class CopyMakeBorderRun {
+
+    public void run(String[] args) {
+
+        //! [variables]
+        // Declare the variables
+        Mat src, dst = new Mat();
+        int top, bottom, left, right;
+        int borderType = Core.BORDER_CONSTANT;
+        String window_name = "copyMakeBorder Demo";
+        Random rng;
+        //! [variables]
+
+        //! [load]
+        String imageName = ((args.length > 0) ? args[0] : "../data/lena.jpg");
+
+        // Load an image
+        src = Imgcodecs.imread(imageName, Imgcodecs.IMREAD_COLOR);
+
+        // Check if image is loaded fine
+        if( src.empty() ) {
+            System.out.println("Error opening image!");
+            System.out.println("Program Arguments: [image_name -- default ../data/lena.jpg] \n");
+            System.exit(-1);
+        }
+        //! [load]
+
+        // Brief how-to for this program
+        System.out.println("\n" +
+                "\t copyMakeBorder Demo: \n" +
+                "\t -------------------- \n" +
+                " ** Press 'c' to set the border to a random constant value \n" +
+                " ** Press 'r' to set the border to be replicated \n" +
+                " ** Press 'ESC' to exit the program \n");
+
+        //![create_window]
+        HighGui.namedWindow( window_name, HighGui.WINDOW_AUTOSIZE );
+        //![create_window]
+
+        //! [init_arguments]
+        // Initialize arguments for the filter
+        top = (int) (0.05*src.rows()); bottom = top;
+        left = (int) (0.05*src.cols()); right = left;
+        //! [init_arguments]
+
+        while( true ) {
+            //! [update_value]
+            rng = new Random();
+            Scalar value = new Scalar( rng.nextInt(256),
+                    rng.nextInt(256), rng.nextInt(256) );
+            //! [update_value]
+
+            //! [copymakeborder]
+            Core.copyMakeBorder( src, dst, top, bottom, left, right, borderType, value);
+            //! [copymakeborder]
+            //! [display]
+            HighGui.imshow( window_name, dst );
+            //! [display]
+
+            //![check_keypress]
+            char c = (char) HighGui.waitKey(500);
+            c = Character.toLowerCase(c);
+
+            if( c == 27 )
+            { break; }
+            else if( c == 'c' )
+            { borderType = Core.BORDER_CONSTANT;}
+            else if( c == 'r' )
+            { borderType = Core.BORDER_REPLICATE;}
+            //![check_keypress]
+        }
+
+        System.exit(0);
+    }
+}
+
+public class CopyMakeBorder {
+    public static void main(String[] args) {
+        // Load the native library.
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+        new CopyMakeBorderRun().run(args);
+    }
+}
diff --git a/samples/java/tutorial_code/ImgTrans/SobelDemo/SobelDemo.java b/samples/java/tutorial_code/ImgTrans/SobelDemo/SobelDemo.java
new file mode 100644 (file)
index 0000000..815c16b
--- /dev/null
@@ -0,0 +1,94 @@
+/**
+ * @file SobelDemo.java
+ * @brief Sample code using Sobel and/or Scharr OpenCV functions to make a simple Edge Detector
+ */
+
+import org.opencv.core.*;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgproc.Imgproc;
+
+class SobelDemoRun {
+
+    public void run(String[] args) {
+
+        //! [declare_variables]
+        // First we declare the variables we are going to use
+        Mat src, src_gray = new Mat();
+        Mat grad = new Mat();
+        String window_name = "Sobel Demo - Simple Edge Detector";
+        int scale = 1;
+        int delta = 0;
+        int ddepth = CvType.CV_16S;
+        //! [declare_variables]
+
+        //! [load]
+        // As usual we load our source image (src)
+        // Check number of arguments
+        if (args.length == 0){
+            System.out.println("Not enough parameters!");
+            System.out.println("Program Arguments: [image_path]");
+            System.exit(-1);
+        }
+
+        // Load the image
+        src = Imgcodecs.imread(args[0]);
+
+        // Check if image is loaded fine
+        if( src.empty() ) {
+            System.out.println("Error opening image: " + args[0]);
+            System.exit(-1);
+        }
+        //! [load]
+
+        //! [reduce_noise]
+        // Remove noise by blurring with a Gaussian filter ( kernel size = 3 )
+        Imgproc.GaussianBlur( src, src, new Size(3, 3), 0, 0, Core.BORDER_DEFAULT );
+        //! [reduce_noise]
+
+        //! [convert_to_gray]
+        // Convert the image to grayscale
+        Imgproc.cvtColor( src, src_gray, Imgproc.COLOR_RGB2GRAY );
+        //! [convert_to_gray]
+
+        //! [sobel]
+        /// Generate grad_x and grad_y
+        Mat grad_x = new Mat(), grad_y = new Mat();
+        Mat abs_grad_x = new Mat(), abs_grad_y = new Mat();
+
+        /// Gradient X
+        //Imgproc.Scharr( src_gray, grad_x, ddepth, 1, 0, scale, delta, Core.BORDER_DEFAULT );
+        Imgproc.Sobel( src_gray, grad_x, ddepth, 1, 0, 3, scale, delta, Core.BORDER_DEFAULT );
+
+        /// Gradient Y
+        //Imgproc.Scharr( src_gray, grad_y, ddepth, 0, 1, scale, delta, Core.BORDER_DEFAULT );
+        Imgproc.Sobel( src_gray, grad_y, ddepth, 0, 1, 3, scale, delta, Core.BORDER_DEFAULT );
+        //! [sobel]
+
+        //![convert]
+        // converting back to CV_8U
+        Core.convertScaleAbs( grad_x, abs_grad_x );
+        Core.convertScaleAbs( grad_y, abs_grad_y );
+        //![convert]
+
+        //! [add_weighted]
+        /// Total Gradient (approximate)
+        Core.addWeighted( abs_grad_x, 0.5, abs_grad_y, 0.5, 0, grad );
+        //! [add_weighted]
+
+        //! [display]
+        HighGui.imshow( window_name, grad );
+        HighGui.waitKey(0);
+        //! [display]
+
+        System.exit(0);
+    }
+}
+
+public class SobelDemo {
+    public static void main(String[] args) {
+        // Load the native library.
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+        new SobelDemoRun().run(args);
+    }
+}
index e285c08..36d5972 100755 (executable)
@@ -64,7 +64,7 @@ PlanarTarget = namedtuple('PlaneTarget', 'image, rect, keypoints, descrs, data')
   p0     - matched points coords in target image
   p1     - matched points coords in input frame
   H      - homography matrix from p0 to p1
-  quad   - target bounary quad in input frame
+  quad   - target boundary quad in input frame
 '''
 TrackedTarget = namedtuple('TrackedTarget', 'target, p0, p1, H, quad')
 
diff --git a/samples/python/tutorial_code/ImgTrans/Filter2D/filter2D.py b/samples/python/tutorial_code/ImgTrans/Filter2D/filter2D.py
new file mode 100644 (file)
index 0000000..e4d1afe
--- /dev/null
@@ -0,0 +1,54 @@
+"""
+@file filter2D.py
+@brief Sample code that shows how to implement your own linear filters by using filter2D function
+"""
+import sys
+import cv2
+import numpy as np
+
+
+def main(argv):
+    window_name = 'filter2D Demo'
+
+    ## [load]
+    imageName = argv[0] if len(argv) > 0 else "../data/lena.jpg"
+
+    # Loads an image
+    src = cv2.imread(imageName, cv2.IMREAD_COLOR)
+
+    # Check if image is loaded fine
+    if src is None:
+        print ('Error opening image!')
+        print ('Usage: filter2D.py [image_name -- default ../data/lena.jpg] \n')
+        return -1
+    ## [load]
+    ## [init_arguments]
+    # Initialize ddepth argument for the filter
+    ddepth = -1
+    ## [init_arguments]
+    # Loop - Will filter the image with different kernel sizes each 0.5 seconds
+    ind = 0
+    while True:
+        ## [update_kernel]
+        # Update kernel size for a normalized box filter
+        kernel_size = 3 + 2 * (ind % 5)
+        kernel = np.ones((kernel_size, kernel_size), dtype=np.float32)
+        kernel /= (kernel_size * kernel_size)
+        ## [update_kernel]
+        ## [apply_filter]
+        # Apply filter
+        dst = cv2.filter2D(src, ddepth, kernel)
+        ## [apply_filter]
+        cv2.imshow(window_name, dst)
+
+        c = cv2.waitKey(500)
+        if c == 27:
+            break
+
+        ind += 1
+
+    return 0
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/samples/python/tutorial_code/ImgTrans/HoughCircle/hough_circle.py b/samples/python/tutorial_code/ImgTrans/HoughCircle/hough_circle.py
new file mode 100644 (file)
index 0000000..c8c472f
--- /dev/null
@@ -0,0 +1,59 @@
+import sys
+import cv2
+import numpy as np
+
+
+def main(argv):
+    ## [load]
+    default_file =  "../../../../data/smarties.png"
+    filename = argv[0] if len(argv) > 0 else default_file
+
+    # Loads an image
+    src = cv2.imread(filename, cv2.IMREAD_COLOR)
+
+    # Check if image is loaded fine
+    if src is None:
+        print ('Error opening image!')
+        print ('Usage: hough_circle.py [image_name -- default ' + default_file + '] \n')
+        return -1
+    ## [load]
+
+    ## [convert_to_gray]
+    # Convert it to gray
+    gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
+    ## [convert_to_gray]
+
+    ## [reduce_noise]
+    # Reduce the noise to avoid false circle detection
+    gray = cv2.medianBlur(gray, 5)
+    ## [reduce_noise]
+
+    ## [houghcircles]
+    rows = gray.shape[0]
+    circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, rows / 8,
+                               param1=100, param2=30,
+                               minRadius=1, maxRadius=30)
+    ## [houghcircles]
+
+    ## [draw]
+    if circles is not None:
+        circles = np.uint16(np.around(circles))
+        for i in circles[0, :]:
+            center = (i[0], i[1])
+            # circle center
+            cv2.circle(src, center, 1, (0, 100, 100), 3)
+            # circle outline
+            radius = i[2]
+            cv2.circle(src, center, radius, (255, 0, 255), 3)
+    ## [draw]
+
+    ## [display]
+    cv2.imshow("detected circles", src)
+    cv2.waitKey(0)
+    ## [display]
+
+    return 0
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/samples/python/tutorial_code/ImgTrans/HoughLine/hough_lines.py b/samples/python/tutorial_code/ImgTrans/HoughLine/hough_lines.py
new file mode 100644 (file)
index 0000000..2907996
--- /dev/null
@@ -0,0 +1,79 @@
+"""
+@file hough_lines.py
+@brief This program demonstrates line finding with the Hough transform
+"""
+import sys
+import math
+import cv2
+import numpy as np
+
+
+def main(argv):
+    ## [load]
+    default_file =  "../../../../data/sudoku.png"
+    filename = argv[0] if len(argv) > 0 else default_file
+
+    # Loads an image
+    src = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
+
+    # Check if image is loaded fine
+    if src is None:
+        print ('Error opening image!')
+        print ('Usage: hough_lines.py [image_name -- default ' + default_file + '] \n')
+        return -1
+    ## [load]
+
+    ## [edge_detection]
+    # Edge detection
+    dst = cv2.Canny(src, 50, 200, None, 3)
+    ## [edge_detection]
+
+    # Copy edges to the images that will display the results in BGR
+    cdst = cv2.cvtColor(dst, cv2.COLOR_GRAY2BGR)
+    cdstP = np.copy(cdst)
+
+    ## [hough_lines]
+    #  Standard Hough Line Transform
+    lines = cv2.HoughLines(dst, 1, np.pi / 180, 150, None, 0, 0)
+    ## [hough_lines]
+    ## [draw_lines]
+    # Draw the lines
+    if lines is not None:
+        for i in range(0, len(lines)):
+            rho = lines[i][0][0]
+            theta = lines[i][0][1]
+            a = math.cos(theta)
+            b = math.sin(theta)
+            x0 = a * rho
+            y0 = b * rho
+            pt1 = (int(x0 + 1000*(-b)), int(y0 + 1000*(a)))
+            pt2 = (int(x0 - 1000*(-b)), int(y0 - 1000*(a)))
+
+            cv2.line(cdst, pt1, pt2, (0,0,255), 3, cv2.LINE_AA)
+    ## [draw_lines]
+
+    ## [hough_lines_p]
+    # Probabilistic Line Transform
+    linesP = cv2.HoughLinesP(dst, 1, np.pi / 180, 50, None, 50, 10)
+    ## [hough_lines_p]
+    ## [draw_lines_p]
+    # Draw the lines
+    if linesP is not None:
+        for i in range(0, len(linesP)):
+            l = linesP[i][0]
+            cv2.line(cdstP, (l[0], l[1]), (l[2], l[3]), (0,0,255), 3, cv2.LINE_AA)
+    ## [draw_lines_p]
+    ## [imshow]
+    # Show results
+    cv2.imshow("Source", src)
+    cv2.imshow("Detected Lines (in red) - Standard Hough Line Transform", cdst)
+    cv2.imshow("Detected Lines (in red) - Probabilistic Line Transform", cdstP)
+    ## [imshow]
+    ## [exit]
+    # Wait and Exit
+    cv2.waitKey()
+    return 0
+    ## [exit]
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/samples/python/tutorial_code/ImgTrans/LaPlace/laplace_demo.py b/samples/python/tutorial_code/ImgTrans/LaPlace/laplace_demo.py
new file mode 100644 (file)
index 0000000..5776e44
--- /dev/null
@@ -0,0 +1,59 @@
+"""
+@file laplace_demo.py
+@brief Sample code showing how to detect edges using the Laplace operator
+"""
+import sys
+import cv2
+
+def main(argv):
+    # [variables]
+    # Declare the variables we are going to use
+    ddepth = cv2.CV_16S
+    kernel_size = 3
+    window_name = "Laplace Demo"
+    # [variables]
+
+    # [load]
+    imageName = argv[0] if len(argv) > 0 else "../data/lena.jpg"
+
+    src = cv2.imread(imageName, cv2.IMREAD_COLOR) # Load an image
+
+    # Check if image is loaded fine
+    if src is None:
+        print ('Error opening image')
+        print ('Program Arguments: [image_name -- default ../data/lena.jpg]')
+        return -1
+    # [load]
+
+    # [reduce_noise]
+    # Remove noise by blurring with a Gaussian filter
+    src = cv2.GaussianBlur(src, (3, 3), 0)
+    # [reduce_noise]
+
+    # [convert_to_gray]
+    # Convert the image to grayscale
+    src_gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
+    # [convert_to_gray]
+
+    # Create Window
+    cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
+
+    # [laplacian]
+    # Apply Laplace function
+    dst = cv2.Laplacian(src_gray, ddepth, kernel_size)
+    # [laplacian]
+
+    # [convert]
+    # converting back to uint8
+    abs_dst = cv2.convertScaleAbs(dst)
+    # [convert]
+
+    # [display]
+    cv2.imshow(window_name, abs_dst)
+    cv2.waitKey(0)
+    # [display]
+
+    return 0
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/samples/python/tutorial_code/ImgTrans/MakeBorder/copy_make_border.py b/samples/python/tutorial_code/ImgTrans/MakeBorder/copy_make_border.py
new file mode 100644 (file)
index 0000000..36b4e13
--- /dev/null
@@ -0,0 +1,69 @@
+"""
+@file copy_make_border.py
+@brief Sample code that shows the functionality of copyMakeBorder
+"""
+import sys
+from random import randint
+import cv2
+
+
+def main(argv):
+    ## [variables]
+    # First we declare the variables we are going to use
+    borderType = cv2.BORDER_CONSTANT
+    window_name = "copyMakeBorder Demo"
+    ## [variables]
+    ## [load]
+    imageName = argv[0] if len(argv) > 0 else "../data/lena.jpg"
+
+    # Loads an image
+    src = cv2.imread(imageName, cv2.IMREAD_COLOR)
+
+    # Check if image is loaded fine
+    if src is None:
+        print ('Error opening image!')
+        print ('Usage: copy_make_border.py [image_name -- default ../data/lena.jpg] \n')
+        return -1
+    ## [load]
+    # Brief how-to for this program
+    print ('\n'
+           '\t          copyMakeBorder Demo: \n'
+           '    -------------------- \n'
+           ' ** Press \'c\' to set the border to a random constant value \n'
+           ' ** Press \'r\' to set the border to be replicated \n'
+           ' ** Press \'ESC\' to exit the program ')
+    ## [create_window]
+    cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
+    ## [create_window]
+    ## [init_arguments]
+    # Initialize arguments for the filter
+    top = int(0.05 * src.shape[0])  # shape[0] = rows
+    bottom = top
+    left = int(0.05 * src.shape[1])  # shape[1] = cols
+    right = left
+    ## [init_arguments]
+    while 1:
+        ## [update_value]
+        value = [randint(0, 255), randint(0, 255), randint(0, 255)]
+        ## [update_value]
+        ## [copymakeborder]
+        dst = cv2.copyMakeBorder(src, top, bottom, left, right, borderType, None, value)
+        ## [copymakeborder]
+        ## [display]
+        cv2.imshow(window_name, dst)
+        ## [display]
+        ## [check_keypress]
+        c = cv2.waitKey(500)
+
+        if c == 27:
+            break
+        elif c == 99: # 99 = ord('c')
+            borderType = cv2.BORDER_CONSTANT
+        elif c == 114: # 114 = ord('r')
+            borderType = cv2.BORDER_REPLICATE
+        ## [check_keypress]
+    return 0
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/samples/python/tutorial_code/ImgTrans/SobelDemo/sobel_demo.py b/samples/python/tutorial_code/ImgTrans/SobelDemo/sobel_demo.py
new file mode 100644 (file)
index 0000000..4afe3af
--- /dev/null
@@ -0,0 +1,74 @@
+"""
+@file sobel_demo.py
+@brief Sample code using Sobel and/or Scharr OpenCV functions to make a simple Edge Detector
+"""
+import sys
+import cv2
+
+
+def main(argv):
+    ## [variables]
+    # First we declare the variables we are going to use
+    window_name = ('Sobel Demo - Simple Edge Detector')
+    scale = 1
+    delta = 0
+    ddepth = cv2.CV_16S
+    ## [variables]
+
+    ## [load]
+    # As usual we load our source image (src)
+    # Check number of arguments
+    if len(argv) < 1:
+        print ('Not enough parameters')
+        print ('Usage:\nmorph_lines_detection.py < path_to_image >')
+        return -1
+
+    # Load the image
+    src = cv2.imread(argv[0], cv2.IMREAD_COLOR)
+
+    # Check if image is loaded fine
+    if src is None:
+        print ('Error opening image: ' + argv[0])
+        return -1
+    ## [load]
+
+    ## [reduce_noise]
+    # Remove noise by blurring with a Gaussian filter ( kernel size = 3 )
+    src = cv2.GaussianBlur(src, (3, 3), 0)
+    ## [reduce_noise]
+
+    ## [convert_to_gray]
+    # Convert the image to grayscale
+    gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
+    ## [convert_to_gray]
+
+    ## [sobel]
+    # Gradient-X
+    # grad_x = cv2.Scharr(gray,ddepth,1,0)
+    grad_x = cv2.Sobel(gray, ddepth, 1, 0, ksize=3, scale=scale, delta=delta, borderType=cv2.BORDER_DEFAULT)
+
+    # Gradient-Y
+    # grad_y = cv2.Scharr(gray,ddepth,0,1)
+    grad_y = cv2.Sobel(gray, ddepth, 0, 1, ksize=3, scale=scale, delta=delta, borderType=cv2.BORDER_DEFAULT)
+    ## [sobel]
+
+    ## [convert]
+    # converting back to uint8
+    abs_grad_x = cv2.convertScaleAbs(grad_x)
+    abs_grad_y = cv2.convertScaleAbs(grad_y)
+    ## [convert]
+
+    ## [blend]
+    ## Total Gradient (approximate)
+    grad = cv2.addWeighted(abs_grad_x, 0.5, abs_grad_y, 0.5, 0)
+    ## [blend]
+
+    ## [display]
+    cv2.imshow(window_name, grad)
+    cv2.waitKey(0)
+    ## [display]
+
+    return 0
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/samples/python/tutorial_code/imgProc/HitMiss/hit_miss.py b/samples/python/tutorial_code/imgProc/HitMiss/hit_miss.py
new file mode 100644 (file)
index 0000000..c25715c
--- /dev/null
@@ -0,0 +1,38 @@
+import cv2
+import numpy as np
+
+input_image = np.array((
+    [0, 0, 0, 0, 0, 0, 0, 0],
+    [0, 255, 255, 255, 0, 0, 0, 255],
+    [0, 255, 255, 255, 0, 0, 0, 0],
+    [0, 255, 255, 255, 0, 255, 0, 0],
+    [0, 0, 255, 0, 0, 0, 0, 0],
+    [0, 0, 255, 0, 0, 255, 255, 0],
+    [0,255, 0, 255, 0, 0, 255, 0],
+    [0, 255, 255, 255, 0, 0, 0, 0]), dtype="uint8")
+
+kernel = np.array((
+        [0, 1, 0],
+        [1, -1, 1],
+        [0, 1, 0]), dtype="int")
+
+output_image = cv2.morphologyEx(input_image, cv2.MORPH_HITMISS, kernel)
+
+rate = 50
+kernel = (kernel + 1) * 127
+kernel = np.uint8(kernel)
+
+kernel = cv2.resize(kernel, None, fx = rate, fy = rate, interpolation = cv2.INTER_NEAREST)
+cv2.imshow("kernel", kernel)
+cv2.moveWindow("kernel", 0, 0)
+
+input_image = cv2.resize(input_image, None, fx = rate, fy = rate, interpolation = cv2.INTER_NEAREST)
+cv2.imshow("Original", input_image)
+cv2.moveWindow("Original", 0, 200)
+
+output_image = cv2.resize(output_image, None , fx = rate, fy = rate, interpolation = cv2.INTER_NEAREST)
+cv2.imshow("Hit or Miss", output_image)
+cv2.moveWindow("Hit or Miss", 500, 200)
+
+cv2.waitKey(0)
+cv2.destroyAllWindows()
diff --git a/samples/python/tutorial_code/imgProc/Pyramids/pyramids.py b/samples/python/tutorial_code/imgProc/Pyramids/pyramids.py
new file mode 100644 (file)
index 0000000..127345a
--- /dev/null
@@ -0,0 +1,51 @@
+import sys
+import cv2
+
+
+def main(argv):
+    print("""
+    Zoom In-Out demo
+    ------------------
+    * [i] -> Zoom [i]n
+    * [o] -> Zoom [o]ut
+    * [ESC] -> Close program
+    """)
+    ## [load]
+    filename = argv[0] if len(argv) > 0 else "../data/chicky_512.png"
+
+    # Load the image
+    src = cv2.imread(filename)
+
+    # Check if image is loaded fine
+    if src is None:
+        print ('Error opening image!')
+        print ('Usage: pyramids.py [image_name -- default ../data/chicky_512.png] \n')
+        return -1
+    ## [load]
+    ## [loop]
+    while 1:
+        rows, cols, _channels = map(int, src.shape)
+        ## [show_image]
+        cv2.imshow('Pyramids Demo', src)
+        ## [show_image]
+        k = cv2.waitKey(0)
+
+        if k == 27:
+            break
+            ## [pyrup]
+        elif chr(k) == 'i':
+            src = cv2.pyrUp(src, dstsize=(2 * cols, 2 * rows))
+            print ('** Zoom In: Image x 2')
+            ## [pyrup]
+            ## [pyrdown]
+        elif chr(k) == 'o':
+            src = cv2.pyrDown(src, dstsize=(cols // 2, rows // 2))
+            print ('** Zoom Out: Image / 2')
+            ## [pyrdown]
+    ## [loop]
+
+    cv2.destroyAllWindows()
+    return 0
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/samples/python/tutorial_code/imgProc/Smoothing/smoothing.py b/samples/python/tutorial_code/imgProc/Smoothing/smoothing.py
new file mode 100644 (file)
index 0000000..c20ca87
--- /dev/null
@@ -0,0 +1,107 @@
+import sys
+import cv2
+import numpy as np
+
+#  Global Variables
+
+DELAY_CAPTION = 1500
+DELAY_BLUR = 100
+MAX_KERNEL_LENGTH = 31
+
+src = None
+dst = None
+window_name = 'Smoothing Demo'
+
+
+def main(argv):
+    cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
+
+    # Load the source image
+    imageName = argv[0] if len(argv) > 0 else "../data/lena.jpg"
+
+    global src
+    src = cv2.imread(imageName, 1)
+    if src is None:
+        print ('Error opening image')
+        print ('Usage: smoothing.py [image_name -- default ../data/lena.jpg] \n')
+        return -1
+
+    if display_caption('Original Image') != 0:
+        return 0
+
+    global dst
+    dst = np.copy(src)
+    if display_dst(DELAY_CAPTION) != 0:
+        return 0
+
+    # Applying Homogeneous blur
+    if display_caption('Homogeneous Blur') != 0:
+        return 0
+
+    ## [blur]
+    for i in range(1, MAX_KERNEL_LENGTH, 2):
+        dst = cv2.blur(src, (i, i))
+        if display_dst(DELAY_BLUR) != 0:
+            return 0
+    ## [blur]
+
+    # Applying Gaussian blur
+    if display_caption('Gaussian Blur') != 0:
+        return 0
+
+    ## [gaussianblur]
+    for i in range(1, MAX_KERNEL_LENGTH, 2):
+        dst = cv2.GaussianBlur(src, (i, i), 0)
+        if display_dst(DELAY_BLUR) != 0:
+            return 0
+    ## [gaussianblur]
+
+    # Applying Median blur
+    if display_caption('Median Blur') != 0:
+        return 0
+
+    ## [medianblur]
+    for i in range(1, MAX_KERNEL_LENGTH, 2):
+        dst = cv2.medianBlur(src, i)
+        if display_dst(DELAY_BLUR) != 0:
+            return 0
+    ## [medianblur]
+
+    # Applying Bilateral Filter
+    if display_caption('Bilateral Blur') != 0:
+        return 0
+
+    ## [bilateralfilter]
+    # Remember, bilateral is a bit slow, so as value go higher, it takes long time
+    for i in range(1, MAX_KERNEL_LENGTH, 2):
+        dst = cv2.bilateralFilter(src, i, i * 2, i / 2)
+        if display_dst(DELAY_BLUR) != 0:
+            return 0
+    ## [bilateralfilter]
+
+    #  Done
+    display_caption('Done!')
+
+    return 0
+
+
+def display_caption(caption):
+    global dst
+    dst = np.zeros(src.shape, src.dtype)
+    rows, cols, ch = src.shape
+    cv2.putText(dst, caption,
+                (int(cols / 4), int(rows / 2)),
+                cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255))
+
+    return display_dst(DELAY_CAPTION)
+
+
+def display_dst(delay):
+    cv2.imshow(window_name, dst)
+    c = cv2.waitKey(delay)
+    if c >= 0 : return -1
+    return 0
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/samples/python/tutorial_code/imgProc/morph_lines_detection/morph_lines_detection.py b/samples/python/tutorial_code/imgProc/morph_lines_detection/morph_lines_detection.py
new file mode 100644 (file)
index 0000000..24965e5
--- /dev/null
@@ -0,0 +1,136 @@
+"""
+@file morph_lines_detection.py
+@brief Use morphology transformations for extracting horizontal and vertical lines sample code
+"""
+import numpy as np
+import sys
+import cv2
+
+
+def show_wait_destroy(winname, img):
+    cv2.imshow(winname, img)
+    cv2.moveWindow(winname, 500, 0)
+    cv2.waitKey(0)
+    cv2.destroyWindow(winname)
+
+
+def main(argv):
+    # [load_image]
+    # Check number of arguments
+    if len(argv) < 1:
+        print ('Not enough parameters')
+        print ('Usage:\nmorph_lines_detection.py < path_to_image >')
+        return -1
+
+    # Load the image
+    src = cv2.imread(argv[0], cv2.IMREAD_COLOR)
+
+    # Check if image is loaded fine
+    if src is None:
+        print ('Error opening image: ' + argv[0])
+        return -1
+
+    # Show source image
+    cv2.imshow("src", src)
+    # [load_image]
+
+    # [gray]
+    # Transform source image to gray if it is not already
+    if len(src.shape) != 2:
+        gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
+    else:
+        gray = src
+
+    # Show gray image
+    show_wait_destroy("gray", gray)
+    # [gray]
+
+    # [bin]
+    # Apply adaptiveThreshold at the bitwise_not of gray, notice the ~ symbol
+    gray = cv2.bitwise_not(gray)
+    bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, \
+                                cv2.THRESH_BINARY, 15, -2)
+    # Show binary image
+    show_wait_destroy("binary", bw)
+    # [bin]
+
+    # [init]
+    # Create the images that will use to extract the horizontal and vertical lines
+    horizontal = np.copy(bw)
+    vertical = np.copy(bw)
+    # [init]
+
+    # [horiz]
+    # Specify size on horizontal axis
+    cols = horizontal.shape[1]
+    horizontal_size = cols / 30
+
+    # Create structure element for extracting horizontal lines through morphology operations
+    horizontalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_size, 1))
+
+    # Apply morphology operations
+    horizontal = cv2.erode(horizontal, horizontalStructure)
+    horizontal = cv2.dilate(horizontal, horizontalStructure)
+
+    # Show extracted horizontal lines
+    show_wait_destroy("horizontal", horizontal)
+    # [horiz]
+
+    # [vert]
+    # Specify size on vertical axis
+    rows = vertical.shape[0]
+    verticalsize = rows / 30
+
+    # Create structure element for extracting vertical lines through morphology operations
+    verticalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, verticalsize))
+
+    # Apply morphology operations
+    vertical = cv2.erode(vertical, verticalStructure)
+    vertical = cv2.dilate(vertical, verticalStructure)
+
+    # Show extracted vertical lines
+    show_wait_destroy("vertical", vertical)
+    # [vert]
+
+    # [smooth]
+    # Inverse vertical image
+    vertical = cv2.bitwise_not(vertical)
+    show_wait_destroy("vertical_bit", vertical)
+
+    '''
+    Extract edges and smooth image according to the logic
+    1. extract edges
+    2. dilate(edges)
+    3. src.copyTo(smooth)
+    4. blur smooth img
+    5. smooth.copyTo(src, edges)
+    '''
+
+    # Step 1
+    edges = cv2.adaptiveThreshold(vertical, 255, cv2.ADAPTIVE_THRESH_MEAN_C, \
+                                cv2.THRESH_BINARY, 3, -2)
+    show_wait_destroy("edges", edges)
+
+    # Step 2
+    kernel = np.ones((2, 2), np.uint8)
+    edges = cv2.dilate(edges, kernel)
+    show_wait_destroy("dilate", edges)
+
+    # Step 3
+    smooth = np.copy(vertical)
+
+    # Step 4
+    smooth = cv2.blur(smooth, (2, 2))
+
+    # Step 5
+    (rows, cols) = np.where(edges != 0)
+    vertical[rows, cols] = smooth[rows, cols]
+
+    # Show final result
+    show_wait_destroy("smooth - final", vertical)
+    # [smooth]
+
+    return 0
+
+if __name__ == "__main__":
+    main(sys.argv[1:])