--- /dev/null
+/*******************************************************************************
+ * Copyright (c) 2008-2020 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+/*****************************************************************************\
+
+Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
+
+THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
+MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+File Name: cl_va_api_media_sharing_intel.h
+
+Abstract:
+
+Notes:
+
+\*****************************************************************************/
+
+
+#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
+#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
+
+#include <CL/cl.h>
+#include <CL/cl_platform.h>
+#include <va/va.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************
+* cl_intel_va_api_media_sharing extension *
+*******************************************/
+
+#define cl_intel_va_api_media_sharing 1
+
+/* error codes */
+#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
+#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
+#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
+#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
+
+/* cl_va_api_device_source_intel */
+#define CL_VA_API_DISPLAY_INTEL 0x4094
+
+/* cl_va_api_device_set_intel */
+#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
+#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
+
+/* cl_context_info */
+#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
+
+/* cl_mem_info */
+#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
+
+/* cl_image_info */
+#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
+
+/* cl_command_type */
+#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
+#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
+
+typedef cl_uint cl_va_api_device_source_intel;
+typedef cl_uint cl_va_api_device_set_intel;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
+ cl_platform_id platform,
+ cl_va_api_device_source_intel media_adapter_type,
+ void* media_adapter,
+ cl_va_api_device_set_intel media_adapter_set,
+ cl_uint num_entries,
+ cl_device_id* devices,
+ cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
+ cl_platform_id platform,
+ cl_va_api_device_source_intel media_adapter_type,
+ void* media_adapter,
+ cl_va_api_device_set_intel media_adapter_set,
+ cl_uint num_entries,
+ cl_device_id* devices,
+ cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromVA_APIMediaSurfaceINTEL(
+ cl_context context,
+ cl_mem_flags flags,
+ VASurfaceID* surface,
+ cl_uint plane,
+ cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
+ cl_context context,
+ cl_mem_flags flags,
+ VASurfaceID* surface,
+ cl_uint plane,
+ cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireVA_APIMediaSurfacesINTEL(
+ cl_command_queue command_queue,
+ cl_uint num_objects,
+ const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
+ cl_command_queue command_queue,
+ cl_uint num_objects,
+ const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseVA_APIMediaSurfacesINTEL(
+ cl_command_queue command_queue,
+ cl_uint num_objects,
+ const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
+ cl_command_queue command_queue,
+ cl_uint num_objects,
+ const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */
+
# Author: qtang@openailab.com or https://github.com/BUG1989
# qli@openailab.com
# sqfu@openailab.com
-#
-SET(TENGINE_COMMIT_VERSION "8a4c58e0e05cd850f4bb0936a330edc86dc0e28c")
+SET(TENGINE_COMMIT_VERSION "e89cf8870de2ff0a80cfe626c0b52b2a16fb302e")
SET(OCV_TENGINE_DIR "${OpenCV_BINARY_DIR}/3rdparty/libtengine")
SET(OCV_TENGINE_SOURCE_PATH "${OCV_TENGINE_DIR}/Tengine-${TENGINE_COMMIT_VERSION}")
SET(Tengine_FOUND ON)
SET(BUILD_TENGINE ON)
ELSE()
- SET(OCV_TENGINE_FILENAME "${TENGINE_COMMIT_VERSION}.zip")#name2
- SET(OCV_TENGINE_URL "https://github.com/OAID/Tengine/archive/") #url2
- SET(tengine_md5sum f51ca8f3963faeeff3f019a6f6edc206) #md5sum2
+ SET(OCV_TENGINE_FILENAME "${TENGINE_COMMIT_VERSION}.zip")#name
+ SET(OCV_TENGINE_URL "https://github.com/OAID/Tengine/archive/") #url
+ SET(tengine_md5sum 23f61ebb1dd419f1207d8876496289c5) #md5sum
- #MESSAGE(STATUS "**** TENGINE DOWNLOAD BEGIN ****")
ocv_download(FILENAME ${OCV_TENGINE_FILENAME}
HASH ${tengine_md5sum}
URL
if(BUILD_TENGINE)
SET(HAVE_TENGINE 1)
- # android system
- if(ANDROID)
- if(${ANDROID_ABI} STREQUAL "armeabi-v7a")
- SET(CONFIG_ARCH_ARM32 ON)
- elseif(${ANDROID_ABI} STREQUAL "arm64-v8a")
- SET(CONFIG_ARCH_ARM64 ON)
- endif()
- else()
+ if(NOT ANDROID)
# linux system
if(CMAKE_SYSTEM_PROCESSOR STREQUAL arm)
- SET(CONFIG_ARCH_ARM32 ON)
+ SET(TENGINE_TOOLCHAIN_FLAG "-march=armv7-a")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) ## AARCH64
- SET(CONFIG_ARCH_ARM64 ON)
+ SET(TENGINE_TOOLCHAIN_FLAG "-march=armv8-a")
endif()
endif()
SET(BUILT_IN_OPENCV ON) ## set for tengine compile discern .
- SET(Tengine_INCLUDE_DIR "${OCV_TENGINE_SOURCE_PATH}/core/include" CACHE INTERNAL "")
+ SET(Tengine_INCLUDE_DIR "${OCV_TENGINE_SOURCE_PATH}/include" CACHE INTERNAL "")
if(EXISTS "${OCV_TENGINE_SOURCE_PATH}/CMakeLists.txt")
add_subdirectory("${OCV_TENGINE_SOURCE_PATH}" "${OCV_TENGINE_DIR}/build")
else()
status(" VA:" HAVE_VA THEN "YES" ELSE NO)
endif()
-if(WITH_VA_INTEL OR HAVE_VA_INTEL)
- status(" Intel VA-API/OpenCL:" HAVE_VA_INTEL THEN "YES (OpenCL: ${VA_INTEL_IOCL_ROOT})" ELSE NO)
-endif()
-
if(WITH_TENGINE OR HAVE_TENGINE)
status(" Tengine:" HAVE_TENGINE THEN "YES (${TENGINE_LIBRARIES})" ELSE NO)
endif()
IF HAVE_CLAMDFFT THEN "AMDFFT"
IF HAVE_CLAMDBLAS THEN "AMDBLAS"
IF HAVE_OPENCL_D3D11_NV THEN "NVD3D11"
+ IF HAVE_VA_INTEL THEN "INTELVA"
ELSE "no extra features")
status("")
status(" OpenCL:" HAVE_OPENCL THEN "YES (${opencl_features})" ELSE "NO")
# check WITH_OPENCL_D3D11_NV is located in OpenCVDetectDirectX.cmake file
+ if(WITH_VA_INTEL AND HAVE_VA)
+ if(HAVE_OPENCL AND EXISTS "${OPENCL_INCLUDE_DIR}/CL/cl_va_api_media_sharing_intel.h")
+ set(HAVE_VA_INTEL ON)
+ elseif(HAVE_OPENCL AND EXISTS "${OPENCL_INCLUDE_DIR}/CL/va_ext.h")
+ set(HAVE_VA_INTEL ON)
+ set(HAVE_VA_INTEL_OLD_HEADER ON)
+ endif()
+ endif()
+
endif()
list(APPEND HIGHGUI_LIBRARIES comctl32 gdi32 ole32 setupapi ws2_32)
endif(WIN32)
-# --- VA & VA_INTEL ---
-if(WITH_VA_INTEL)
- include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindVA_INTEL.cmake")
- if(VA_INTEL_IOCL_INCLUDE_DIR)
- ocv_include_directories(${VA_INTEL_IOCL_INCLUDE_DIR})
- endif()
- set(WITH_VA YES)
-endif(WITH_VA_INTEL)
-
if(WITH_VA)
include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindVA.cmake")
if(VA_INCLUDE_DIR)
-# Main variables:
-# HAVE_VA for conditional compilation OpenCV with/without libva
+# Output:
+# HAVE_VA - libva is available
+# HAVE_VA_INTEL - OpenCL/libva Intel interoperability extension is available
if(UNIX AND NOT ANDROID)
find_path(
+++ /dev/null
-# Main variables:
-# VA_INTEL_IOCL_INCLUDE_DIR to use VA_INTEL
-# HAVE_VA_INTEL for conditional compilation OpenCV with/without VA_INTEL
-
-# VA_INTEL_IOCL_ROOT - root of Intel OCL installation
-
-if(UNIX AND NOT ANDROID)
- ocv_check_environment_variables(VA_INTEL_IOCL_ROOT)
- if(NOT DEFINED VA_INTEL_IOCL_ROOT)
- set(VA_INTEL_IOCL_ROOT "/opt/intel/opencl")
- endif()
-
- find_path(
- VA_INTEL_IOCL_INCLUDE_DIR
- NAMES CL/va_ext.h
- PATHS ${VA_INTEL_IOCL_ROOT}
- PATH_SUFFIXES include
- DOC "Path to Intel OpenCL headers")
-endif()
-
-if(VA_INTEL_IOCL_INCLUDE_DIR)
- set(HAVE_VA_INTEL TRUE)
- if(NOT DEFINED VA_INTEL_LIBRARIES)
- set(VA_INTEL_LIBRARIES "va" "va-drm")
- endif()
-else()
- set(HAVE_VA_INTEL FALSE)
- message(WARNING "Intel OpenCL installation is not found.")
-endif()
-
-mark_as_advanced(FORCE VA_INTEL_IOCL_INCLUDE_DIR)
if(HAVE_MEMALIGN)
ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/alloc.cpp "HAVE_MEMALIGN=1")
endif()
+if(HAVE_VA_INTEL_OLD_HEADER)
+ ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/va_intel.cpp" "HAVE_VA_INTEL_OLD_HEADER")
+endif()
option(OPENCV_ENABLE_ALLOCATOR_STATS "Enable Allocator metrics" ON)
/** @addtogroup core_va_intel
This section describes Intel VA-API/OpenCL (CL-VA) interoperability.
-To enable CL-VA interoperability support, configure OpenCV using CMake with WITH_VA_INTEL=ON . Currently VA-API is
-supported on Linux only. You should also install Intel Media Server Studio (MSS) to use this feature. You may
-have to specify the path(s) to MSS components for cmake in environment variables:
+To enable basic VA interoperability build OpenCV with libva library integration enabled: `-DWITH_VA=ON` (corresponding dev package should be installed).
-- VA_INTEL_IOCL_ROOT for Intel OpenCL (default is "/opt/intel/opencl").
+To enable advanced CL-VA interoperability support on Intel HW, enable option: `-DWITH_VA_INTEL=ON` (OpenCL integration should be enabled which is the default setting). Special runtime environment should be set up in order to use this feature: correct combination of [libva](https://github.com/intel/libva), [OpenCL runtime](https://github.com/intel/compute-runtime) and [media driver](https://github.com/intel/media-driver) should be installed.
-To use CL-VA interoperability you should first create VADisplay (libva), and then call initializeContextFromVA()
-function to create OpenCL context and set up interoperability.
+Check usage example for details: samples/va_intel/va_intel_interop.cpp
*/
//! @{
}
cl_platform_id platform = platforms[found];
- std::string platformName = PlatformInfo(platform).name();
+ std::string platformName = PlatformInfo(&platform).name();
OpenCLExecutionContext clExecCtx;
try
}
cl_platform_id platform = platforms[found];
- std::string platformName = PlatformInfo(platform).name();
+ std::string platformName = PlatformInfo(&platform).name();
OpenCLExecutionContext clExecCtx;
try
}
cl_platform_id platform = platforms[found];
- std::string platformName = PlatformInfo(platform).name();
+ std::string platformName = PlatformInfo(&platform).name();
OpenCLExecutionContext clExecCtx;
try
}
cl_platform_id platform = platforms[found];
- std::string platformName = PlatformInfo(platform).name();
+ std::string platformName = PlatformInfo(&platform).name();
OpenCLExecutionContext clExecCtx;
try
cl_context context = (cl_context)_context;
cl_device_id deviceID = (cl_device_id)_device;
- std::string platformName = PlatformInfo(platformID).name();
+ std::string platformName = PlatformInfo(&platformID).name();
auto clExecCtx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
CV_Assert(!clExecCtx.empty());
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for OpenGL interop");
cl_platform_id platform = platforms[found];
- std::string platformName = PlatformInfo(platform).name();
+ std::string platformName = PlatformInfo(&platform).name();
OpenCLExecutionContext clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device);
clReleaseDevice(device);
# include "opencl_kernels_core.hpp"
#endif // HAVE_OPENCL
-#if defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL)
+#ifdef HAVE_VA_INTEL
+#ifdef HAVE_VA_INTEL_OLD_HEADER
# include <CL/va_ext.h>
-#endif // HAVE_VA_INTEL && HAVE_OPENCL
+#else
+# include <CL/cl_va_api_media_sharing_intel.h>
+#endif
+#endif
namespace cv { namespace va_intel {
-#if defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL)
+#ifdef HAVE_VA_INTEL
static clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn clGetDeviceIDsFromVA_APIMediaAdapterINTEL = NULL;
static clCreateFromVA_APIMediaSurfaceINTEL_fn clCreateFromVA_APIMediaSurfaceINTEL = NULL;
static bool contextInitialized = false;
-#endif // HAVE_VA_INTEL && HAVE_OPENCL
+#endif // HAVE_VA_INTEL
namespace ocl {
#if !defined(HAVE_VA)
NO_VA_SUPPORT_ERROR;
#else // !HAVE_VA
-# if (defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL))
+# ifdef HAVE_VA_INTEL
contextInitialized = false;
if (tryInterop)
{
contextInitialized = true;
cl_platform_id platform = platforms[found];
- std::string platformName = PlatformInfo(platform).name();
+ std::string platformName = PlatformInfo(&platform).name();
OpenCLExecutionContext clExecCtx;
try
return const_cast<Context&>(clExecCtx.getContext());
}
}
-# endif // HAVE_VA_INTEL && HAVE_OPENCL
+# endif // HAVE_VA_INTEL
{
Context& ctx = Context::getDefault(true);
return ctx;
#endif // !HAVE_VA
}
-#if defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL)
+#ifdef HAVE_VA_INTEL
static bool ocl_convert_nv12_to_bgr(cl_mem clImageY, cl_mem clImageUV, cl_mem clBuffer, int step, int cols, int rows)
{
ocl::Kernel k;
size_t globalsize[] = { (size_t)cols, (size_t)rows };
return k.run(2, globalsize, 0, false);
}
-#endif // HAVE_VA_INTEL && HAVE_OPENCL
+#endif // HAVE_VA_INTEL
} // namespace cv::va_intel::ocl
Size srcSize = src.size();
CV_Assert(srcSize.width == size.width && srcSize.height == size.height);
-# if (defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL))
+#ifdef HAVE_VA_INTEL
if (contextInitialized)
{
UMat u = src.getUMat();
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (UV plane)");
}
else
-# endif // HAVE_VA_INTEL && HAVE_OPENCL
+# endif // HAVE_VA_INTEL
{
Mat m = src.getMat();
// TODO Need to specify ACCESS_WRITE here somehow to prevent useless data copying!
dst.create(size, dtype);
-# if (defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL))
+#ifdef HAVE_VA_INTEL
if (contextInitialized)
{
UMat u = dst.getUMat();
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (UV plane)");
}
else
-# endif // HAVE_VA_INTEL && HAVE_OPENCL
+# endif // HAVE_VA_INTEL
{
Mat m = dst.getMat();
set(sources_options ${sources_options} EXCLUDE_CUDA)
endif()
-
if(HAVE_TENGINE)
list(APPEND include_dirs ${TENGINE_INCLUDE_DIRS})
- if(EXISTS ${TENGINE_LIBRARIES})
- list(APPEND libs ${TENGINE_LIBRARIES})
- else()
- ocv_add_dependencies(opencv_dnn tengine)
- list(APPEND libs ${TENGINE_LIBRARIES})
- endif()
+ list(APPEND libs -Wl,--whole-archive ${TENGINE_LIBRARIES} -Wl,--no-whole-archive)
endif()
-
ocv_module_include_directories(${include_dirs})
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-suggest-override") # GCC
bool isDeviceCompatible()
{
+ if (getDeviceCount() <= 0)
+ return false;
+
int device_id = getDevice();
if (device_id < 0)
return false;
bool doesDeviceSupportFP16()
{
+ if (getDeviceCount() <= 0)
+ return false;
+
int device_id = getDevice();
if (device_id < 0)
return false;
{
CV_TRACE_FUNCTION();
if (preferableBackend == DNN_BACKEND_OPENCV)
+ {
CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
+ }
else if (preferableBackend == DNN_BACKEND_HALIDE)
initHalideBackend();
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
float power;
#endif
+#ifdef HAVE_TENGINE
+ teng_graph_t tengine_graph;
+#endif
+
#ifdef HAVE_CUDA
cuda4dnn::ConvolutionConfiguration::FusionMode cudaFusionMode;
cuda4dnn::ConvolutionConfiguration::ActivationType cudaActType;
cudaFusionMode = cuda4dnn::ConvolutionConfiguration::FusionMode::NONE;
cudaActType = cuda4dnn::ConvolutionConfiguration::ActivationType::IDENTITY;
#endif
+#ifdef HAVE_TENGINE
+ tengine_graph=NULL;
+#endif
}
+#ifdef HAVE_TENGINE
+ ~ConvolutionLayerImpl()
+ {
+ if(NULL != tengine_graph )
+ {
+ tengine_release(tengine_graph);
+ }
+ }
+#endif
MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE
{
for(int i = 0; i < numOutput; i++ )
biasvec[i] = biasMat.at<float>(i);
}
+#ifdef HAVE_TENGINE
+ if(NULL != tengine_graph )
+ {
+ tengine_release(tengine_graph);
+ tengine_graph = NULL ;
+ }
+#endif
#ifdef HAVE_OPENCL
convolutionOp.release();
#endif
}
#ifdef HAVE_TENGINE
- int inch = inputs[0].size[1]; // inch
- int in_h = inputs[0].size[2]; // in_h
- int in_w = inputs[0].size[3]; // in_w
+ bool tengine_ret = false; ;
+
+ std::vector<Mat> teng_in, teng_out;
+ inputs_arr.getMatVector(teng_in);
+ outputs_arr.getMatVector(teng_out);
+
+ int inch = teng_in[0].size[1]; // inch
+ int in_h = teng_in[0].size[2]; // in_h
+ int in_w = teng_in[0].size[3]; // in_w
- int out_b = outputs[0].size[0]; // out batch size
- int outch = outputs[0].size[1]; // outch
- int out_h = outputs[0].size[2]; // out_h
- int out_w = outputs[0].size[3]; // out_w
+ int out_b = teng_out[0].size[0]; // out batch size
+ int outch = teng_out[0].size[1]; // outch
+ int out_h = teng_out[0].size[2]; // out_h
+ int out_w = teng_out[0].size[3]; // out_w
- float *input_ = inputs[0].ptr<float>();
- float *output_ = outputs[0].ptr<float>();
+ float *input_ = teng_in[0].ptr<float>();
+ float *output_ = teng_out[0].ptr<float>();
float *kernel_ = weightsMat.ptr<float>();
float *teg_bias = &biasvec[0];
- bool tengine_ret = tengine_forward(input_, inch, ngroups, in_h, in_w,
- output_, out_b, outch, out_h, out_w,
- kernel_, kernel_size.size(), kernel.height, kernel.width,
- teg_bias, stride.height, stride.width,
- pad.height, pad.width, dilation.height, dilation.width,
- weightsMat.step1(), padMode);
+ int nstripes = std::max(getNumThreads(), 1);
+
+ /* tengine_init will run when first time. */
+ if(NULL == tengine_graph)
+ {
+ tengine_graph = tengine_init(name.c_str(), input_, inch, ngroups, in_h, in_w,
+ output_, out_b, outch, out_h, out_w,
+ kernel_, kernel_size.size(), kernel.height, kernel.width,
+ teg_bias, stride.height, stride.width,
+ pad.height, pad.width, dilation.height, dilation.width,
+ weightsMat.step1(), padMode, tengine_graph, nstripes);
+ /*printf("Init(%s): input=%p(%d %d %d %d ),output=%p(%d %d %d %d ),kernel=%p(%ld %d %d ), bias=%p ,"
+ "stride(%d %d), pad(%d %d), dilation(%d %d) ,weightsMat=%ld, padMode=%s ,tengine_graph = %p \n",
+ name.c_str(),input_, inch, ngroups, in_h, in_w,
+ output_, out_b, outch, out_h, out_w,
+ kernel_, kernel_size.size(), kernel.height, kernel.width,
+ teg_bias, stride.height, stride.width,
+ pad.height, pad.width, dilation.height, dilation.width,
+ weightsMat.step1(), padMode.c_str() ,tengine_graph);*/
+ }
+ if(NULL != tengine_graph)
+ {
+ tengine_ret = tengine_forward(tengine_graph);
+ }
/* activation */
if((true == tengine_ret) && activ )
{
#define TENGINE_GRAPH_CONVOLUTION_HPP
#define FLOAT_TO_REALSIZE (4)
+#ifdef HAVE_TENGINE
+
+#include "tengine_c_api.h"
namespace cv
{
namespace dnn
{
-bool tengine_forward(float *input_, int inch, int group, int in_h, int in_w,
+teng_graph_t tengine_init(const char* name , float* input_, int inch, int group, int in_h, int in_w,
float *output_, int out_b, int outch, int out_h, int out_w,
float *kernel_,int kernel_s , int kernel_h, int kernel_w,
float *teg_bias, int stride_h,int stride_w,
int pad_h, int pad_w, int dilation_h, int dilation_w,
- size_t wstep, const std::string padMode) ;
+ size_t wstep, const std::string padMode , teng_graph_t& graph, int nstripes) ;
+
+bool tengine_forward(teng_graph_t& graph) ;
+bool tengine_release(teng_graph_t& graph) ;
}
}
-#endif /* TENGINE_GRAPH_CONVOLUTION_HPP */
+#endif
+#endif /* TENGINE_GRAPH_CONVOLUTION_HPP */
\ No newline at end of file
#ifdef HAVE_TENGINE
#include "tengine_c_api.h"
-#include "tengine_c_compat.h"
-#include "tengine_operations.h"
+
namespace cv
{
namespace dnn
{
-
-int create_input_node(graph_t graph, const char* node_name, int inch, int in_h, int in_w)
+static int create_input_node(teng_graph_t graph, const char* node_name, int inch, int in_h, int in_w)
{
- node_t node = create_graph_node(graph, node_name, "InputOp");
- tensor_t tensor = create_graph_tensor(graph, node_name, TENGINE_DT_FP32);
- set_node_output_tensor(node, 0, tensor, TENSOR_TYPE_INPUT);
+ node_t node = teng_create_graph_node(graph, node_name, "InputOp");
+ tensor_t tensor = teng_create_graph_tensor(graph, node_name, TENGINE_DT_FP32);
+ teng_set_node_output_tensor(node, 0, tensor, TENSOR_TYPE_INPUT);
int dims[4] = {1, inch, in_h, in_w};
- set_tensor_shape(tensor, dims, 4);
+ teng_set_tensor_shape(tensor, dims, 4);
- release_graph_tensor(tensor);
- release_graph_node(node);
+ teng_release_graph_tensor(tensor);
+ teng_release_graph_node(node);
return 0;
}
-int create_conv_node(graph_t graph, const char* node_name, const char* input_name, int in_h, int in_w, int out_h, int out_w,
+static int create_conv_node(teng_graph_t graph, const char* node_name, const char* input_name, int in_h, int in_w, int out_h, int out_w,
int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int inch, int outch, int group,
int dilation_h, int dilation_w, int activation, std::string padMode)
{
- node_t conv_node = create_graph_node(graph, node_name, "Convolution");
- tensor_t input_tensor = get_graph_tensor(graph, input_name);
+ node_t conv_node = teng_create_graph_node(graph, node_name, "Convolution");
+ tensor_t input_tensor = teng_get_graph_tensor(graph, input_name);
if (input_tensor == NULL)
{
- CV_LOG_WARNING(NULL,"Tengine :input_tensor is NULL . " );
+ CV_LOG_WARNING(NULL,"Tengine: input_tensor is NULL." );
return -1;
}
- set_node_input_tensor(conv_node, 0, input_tensor);
- release_graph_tensor(input_tensor);
+ teng_set_node_input_tensor(conv_node, 0, input_tensor);
+ teng_release_graph_tensor(input_tensor);
/* output */
- tensor_t output_tensor = create_graph_tensor(graph, node_name, TENGINE_DT_FP32);
+ tensor_t output_tensor = teng_create_graph_tensor(graph, node_name, TENGINE_DT_FP32);
- set_node_output_tensor(conv_node, 0, output_tensor, TENSOR_TYPE_VAR);
- release_graph_tensor(output_tensor);
+ teng_set_node_output_tensor(conv_node, 0, output_tensor, TENSOR_TYPE_VAR);
+ teng_release_graph_tensor(output_tensor);
/* weight */
std::string weight_name(node_name);
weight_name += "/weight";
- node_t w_node = create_graph_node(graph, weight_name.c_str(), "Const");
- tensor_t w_tensor = create_graph_tensor(graph, weight_name.c_str(), TENGINE_DT_FP32);
- set_node_output_tensor(w_node, 0, w_tensor, TENSOR_TYPE_CONST);
- set_node_input_tensor(conv_node, 1, w_tensor);
+ node_t w_node = teng_create_graph_node(graph, weight_name.c_str(), "Const");
+ tensor_t w_tensor = teng_create_graph_tensor(graph, weight_name.c_str(), TENGINE_DT_FP32);
+ teng_set_node_output_tensor(w_node, 0, w_tensor, TENSOR_TYPE_CONST);
+ teng_set_node_input_tensor(conv_node, 1, w_tensor);
int w_dims[] = {outch, inch / group, kernel_h, kernel_w};
- set_tensor_shape(w_tensor, w_dims, 4);
+ teng_set_tensor_shape(w_tensor, w_dims, 4);
- release_graph_node(w_node);
- release_graph_tensor(w_tensor);
+ teng_release_graph_node(w_node);
+ teng_release_graph_tensor(w_tensor);
/* bias */
std::string bias_name(node_name);
bias_name += "/bias";
- node_t b_node = create_graph_node(graph, bias_name.c_str(), "Const");
- tensor_t b_tensor = create_graph_tensor(graph, bias_name.c_str(), TENGINE_DT_FP32);
- set_node_output_tensor(b_node, 0, b_tensor, TENSOR_TYPE_CONST);
+ node_t b_node = teng_create_graph_node(graph, bias_name.c_str(), "Const");
+ tensor_t b_tensor = teng_create_graph_tensor(graph, bias_name.c_str(), TENGINE_DT_FP32);
+ teng_set_node_output_tensor(b_node, 0, b_tensor, TENSOR_TYPE_CONST);
int b_dims[] = {outch};
- set_tensor_shape(b_tensor, b_dims, 1);
+ teng_set_tensor_shape(b_tensor, b_dims, 1);
- set_node_input_tensor(conv_node, 2, b_tensor);
- release_graph_node(b_node);
- release_graph_tensor(b_tensor);
+ teng_set_node_input_tensor(conv_node, 2, b_tensor);
+ teng_release_graph_node(b_node);
+ teng_release_graph_tensor(b_tensor);
int pad_h1 = pad_h;
int pad_w1 = pad_w;
}
/* attr */
- set_node_attr_int(conv_node, "kernel_h", &kernel_h);
- set_node_attr_int(conv_node, "kernel_w", &kernel_w);
- set_node_attr_int(conv_node, "stride_h", &stride_h);
- set_node_attr_int(conv_node, "stride_w", &stride_w);
- set_node_attr_int(conv_node, "pad_h0", &pad_h);
- set_node_attr_int(conv_node, "pad_w0", &pad_w);
- set_node_attr_int(conv_node, "pad_h1", &pad_h1);
- set_node_attr_int(conv_node, "pad_w1", &pad_w1);
- set_node_attr_int(conv_node, "output_channel", &outch);
- set_node_attr_int(conv_node, "group", &group);
- set_node_attr_int(conv_node, "dilation_h", &dilation_h);
- set_node_attr_int(conv_node, "dilation_w", &dilation_w);
- set_node_attr_int(conv_node, "activation", &activation);
-
- release_graph_node(conv_node);
+ teng_set_node_attr_int(conv_node, "kernel_h", &kernel_h);
+ teng_set_node_attr_int(conv_node, "kernel_w", &kernel_w);
+ teng_set_node_attr_int(conv_node, "stride_h", &stride_h);
+ teng_set_node_attr_int(conv_node, "stride_w", &stride_w);
+ teng_set_node_attr_int(conv_node, "pad_h0", &pad_h);
+ teng_set_node_attr_int(conv_node, "pad_w0", &pad_w);
+ teng_set_node_attr_int(conv_node, "pad_h1", &pad_h1);
+ teng_set_node_attr_int(conv_node, "pad_w1", &pad_w1);
+ teng_set_node_attr_int(conv_node, "output_channel", &outch);
+ teng_set_node_attr_int(conv_node, "input_channel", &inch);
+ teng_set_node_attr_int(conv_node, "group", &group);
+ teng_set_node_attr_int(conv_node, "dilation_h", &dilation_h);
+ teng_set_node_attr_int(conv_node, "dilation_w", &dilation_w);
+ // set_node_attr_int(conv_node, "activation", &activation);
+
+ teng_release_graph_node(conv_node);
return 0;
}
-graph_t create_conv_graph(float *input_data, int inch, int group, int in_h, int in_w,
- float *output_data, int outch, int out_h, int out_w,
+static teng_graph_t create_conv_graph(const char* layer_name, float* input_data, int inch, int group, int in_h, int in_w,
+ float* output_data, int outch, int out_h, int out_w,
int kernel_h, int kernel_w,
int stride_h,int stride_w,
int pad_h, int pad_w, int dilation_h, int dilation_w, int activation,
- float * teg_weight , float * teg_bias , std::string padMode)
+ float* teg_weight, float* teg_bias, std::string padMode, int nstripes)
{
node_t conv_node = NULL;
int input_num = 0;
/* create graph */
- graph_t graph = create_graph(NULL, NULL, NULL);
+ teng_graph_t graph = teng_create_graph(NULL, NULL, NULL);
bool ok = true;
if(graph == NULL)
{
- CV_LOG_WARNING(NULL,"Tengine :create_graph failed . " );
+ CV_LOG_WARNING(NULL,"Tengine: create_graph failed." );
ok = false;
}
const char* input_name = "data";
- const char* conv_name = "conv";
+ const char* conv_name = layer_name;
if (ok && create_input_node(graph, input_name, inch, in_h, in_w) < 0)
{
- CV_LOG_WARNING(NULL,"Tengine :create_input_node failed. " );
+ CV_LOG_WARNING(NULL,"Tengine: create_input_node failed." );
ok = false;
}
if (ok && create_conv_node(graph, conv_name, input_name, in_h, in_w, out_h, out_w, kernel_h, kernel_w,
stride_h, stride_w, pad_h, pad_w, inch, outch, group, dilation_h, dilation_w, activation, padMode) < 0)
{
- CV_LOG_WARNING(NULL,"Tengine :create conv node failed. " );
+ CV_LOG_WARNING(NULL,"Tengine: create conv node failed." );
ok = false;
}
const char* inputs_name[] = {input_name};
const char* outputs_name[] = {conv_name};
- if (ok && set_graph_input_node(graph, inputs_name, sizeof(inputs_name) / sizeof(char*)) < 0)
+ if (ok && teng_set_graph_input_node(graph, inputs_name, sizeof(inputs_name) / sizeof(char*)) < 0)
{
- CV_LOG_WARNING(NULL,"Tengine :set inputs failed . " );
+ CV_LOG_WARNING(NULL,"Tengine: set inputs failed." );
ok = false;
}
- if (ok && set_graph_output_node(graph, outputs_name, sizeof(outputs_name) / sizeof(char*)) < 0)
+ if (ok && teng_set_graph_output_node(graph, outputs_name, sizeof(outputs_name) / sizeof(char*)) < 0)
{
- CV_LOG_WARNING(NULL,"Tengine :set outputs failed . " );
+ CV_LOG_WARNING(NULL,"Tengine: set outputs failed." );
ok = false;
}
/* set input data */
if (ok)
{
- input_tensor = get_graph_input_tensor(graph, 0, 0);
- buf_size = get_tensor_buffer_size(input_tensor);
+ input_tensor = teng_get_graph_input_tensor(graph, 0, 0);
+ buf_size = teng_get_tensor_buffer_size(input_tensor);
if (buf_size != in_size * FLOAT_TO_REALSIZE)
{
- CV_LOG_WARNING(NULL,"Tengine :Input data size check failed . ");
+ CV_LOG_WARNING(NULL,"Tengine: Input data size check failed.");
ok = false;
}
}
if (ok)
{
- set_tensor_buffer(input_tensor, (float *)input_data, buf_size);
- release_graph_tensor(input_tensor);
+ teng_set_tensor_buffer(input_tensor, (float *)input_data, buf_size);
+ teng_release_graph_tensor(input_tensor);
/* create convolution node */
/* set weight node */
- conv_node = get_graph_node(graph, "conv");
- weight_tensor = get_node_input_tensor(conv_node, 1);
- buf_size = get_tensor_buffer_size(weight_tensor);
+ conv_node = teng_get_graph_node(graph, conv_name);
+ weight_tensor = teng_get_node_input_tensor(conv_node, 1);
+ buf_size = teng_get_tensor_buffer_size(weight_tensor);
if (buf_size != weight_size * FLOAT_TO_REALSIZE)
{
- CV_LOG_WARNING(NULL,"Input weight size check failed . ");
+ CV_LOG_WARNING(NULL,"Tengine: Input weight size check failed.");
ok = false;
}
}
if (ok)
{
- set_tensor_buffer(weight_tensor, teg_weight, buf_size);
+ teng_set_tensor_buffer(weight_tensor, teg_weight, buf_size);
/* set bias node */
- input_num = get_node_input_number(conv_node);
+ input_num = teng_get_node_input_number(conv_node);
if (input_num > 2)
{
- bias_tensor = get_node_input_tensor(conv_node, 2);
- buf_size = get_tensor_buffer_size(bias_tensor);
+ bias_tensor = teng_get_node_input_tensor(conv_node, 2);
+ buf_size = teng_get_tensor_buffer_size(bias_tensor);
if (buf_size != bias_size * FLOAT_TO_REALSIZE)
{
- CV_LOG_WARNING(NULL,"Tengine :Input bias size check failed . ");
+ CV_LOG_WARNING(NULL,"Tengine: Input bias size check failed.");
ok = false;
}
- else set_tensor_buffer(bias_tensor, teg_bias, buf_size);
+ else teng_set_tensor_buffer(bias_tensor, teg_bias, buf_size);
}
}
+ /* prerun */
+ if (ok && teng_prerun_graph_multithread(graph, TENGINE_CLUSTER_BIG, nstripes) < 0)
+ {
+ CV_LOG_WARNING(NULL, "Tengine: prerun_graph failed.");
+ ok = false;
+ }
+
if (ok)
{
/* set output data */
- output_tensor = get_node_output_tensor(conv_node, 0);
- int ret = set_tensor_buffer(output_tensor, output_data, out_size * FLOAT_TO_REALSIZE);
+ output_tensor = teng_get_node_output_tensor(conv_node, 0);
+ int ret = teng_set_tensor_buffer(output_tensor, output_data, out_size * FLOAT_TO_REALSIZE);
if(ret)
{
- CV_LOG_WARNING(NULL,"Tengine :Set output tensor buffer failed . " );
+ CV_LOG_WARNING(NULL,"Tengine: Set output tensor buffer failed." );
+ ok = false;
}
}
- if (!ok)
+ if (false == ok)
{
- destroy_graph(graph);
- return NULL;
+ teng_destroy_graph(graph) ;
+ return NULL ;
}
return graph;
}
-
-bool tengine_forward(float *input_, int inch, int group, int in_h, int in_w,
+static bool tengine_init_flag = false;
+teng_graph_t tengine_init(const char* layer_name, float* input_, int inch, int group, int in_h, int in_w,
float *output_, int out_b, int outch, int out_h, int out_w,
float *kernel_, int kernel_s ,int kernel_h, int kernel_w,
float *teg_bias, int stride_h,int stride_w,
int pad_h, int pad_w, int dilation_h, int dilation_w,
- size_t wstep,const std::string padMode)
+ size_t wstep, const std::string padMode, teng_graph_t &graph, int nstripes)
{
- graph_t graph = NULL;
std::vector<float> teg_weight_vec;
float *teg_weight = NULL;
int kernel_inwh = (inch / group) * kernel_w * kernel_h;
if (!(kernel_s == 2 && kernel_h == kernel_w && pad_h == pad_w
&& dilation_h == dilation_w && stride_h == stride_w
&& out_b == 1 && pad_h < 10)) // just for Conv2D
- return false;
+ {
+ // printf("return : just for Conv2D\n");
+ return NULL;
+ }
{
- /*printf("Tengine: input (1 x %d x %d x %d),output (%d x %d x %d x %d), kernel (%d x %d), stride (%d x %d), dilation (%d x %d), pad (%d x %d).\n",
- inch, in_h, in_w,
- out_b,outch,out_h,out_w,
+ /* printf("Tengine(%s): input (1 x %d x %d x %d),output (%d x %d x %d x %d), kernel (%d x %d), stride (%d x %d), dilation (%d x %d), pad (%d x %d).\n",
+ layer_name, inch, in_h, in_w,
+ out_b, outch, out_h, out_w,
kernel_w, kernel_h,
stride_w, stride_h,
dilation_w, dilation_h,
- pad_w,pad_h);*/
-
+ pad_w, pad_h);
+ */
// weight
if (kernel_inwh != wstep)
{
}
/* initial the resoruce of tengine */
- init_tengine();
+ if(false == tengine_init_flag)
+ {
+ init_tengine();
+ tengine_init_flag = true;
+ }
/* create the convolution graph */
- graph = create_conv_graph( input_, inch, group, in_h, in_w,
+ graph = create_conv_graph(layer_name, input_, inch, group, in_h, in_w,
output_, outch, out_h, out_w,
kernel_h, kernel_w, stride_h,stride_w,
pad_h, pad_w, dilation_h, dilation_w, activation,
- teg_weight , teg_bias , padMode);
-
- /* prerun */
- if(prerun_graph(graph) < 0)
+ teg_weight, teg_bias, padMode, nstripes);
+ if(NULL == graph )
{
- CV_LOG_WARNING(NULL, "Tengine :prerun_graph failed .");
- return false ;
+ return NULL;
}
-
- /* run */
- if(run_graph(graph, 1) < 0)
- {
- CV_LOG_WARNING(NULL,"Tengine :run_graph failed .");
- return false ;
- }
-
- postrun_graph(graph);
- destroy_graph(graph);
}
- return true ;
+ return graph ;
}
+bool tengine_forward(teng_graph_t &graph)
+{
+ /* run */
+ if(teng_run_graph(graph, 1) < 0)
+ {
+ CV_LOG_WARNING(NULL,"Tengine: run_graph failed.");
+ return false ;
+ }
+ return true;
+}
+bool tengine_release(teng_graph_t &graph)
+{
+ teng_postrun_graph(graph);
+ teng_destroy_graph(graph);
+ return true;
+}
}
}
#endif
#if INF_ENGINE_RELEASE >= 2020010000
// Downloaded using these parameters for Open Model Zoo downloader (2020.1):
// ./downloader.py -o ${OPENCV_DNN_TEST_DATA_PATH}/omz_intel_models --cache_dir ${OPENCV_DNN_TEST_DATA_PATH}/.omz_cache/ \
- // --name person-detection-retail-0013
+ // --name person-detection-retail-0013,age-gender-recognition-retail-0013
{ "person-detection-retail-0013", { // IRv10
"intel/person-detection-retail-0013/FP32/person-detection-retail-0013",
"intel/person-detection-retail-0013/FP16/person-detection-retail-0013"
}},
+ { "age-gender-recognition-retail-0013", {
+ "intel/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013",
+ "intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013"
+ }},
#endif
};
return result;
}
+inline static std::string getOpenVINOModel(const std::string &modelName, bool isFP16)
+{
+ const std::map<std::string, OpenVINOModelTestCaseInfo>& models = getOpenVINOTestModels();
+ const auto it = models.find(modelName);
+ if (it != models.end())
+ {
+ OpenVINOModelTestCaseInfo modelInfo = it->second;
+ if (isFP16 && modelInfo.modelPathFP16)
+ return std::string(modelInfo.modelPathFP16);
+ else if (!isFP16 && modelInfo.modelPathFP32)
+ return std::string(modelInfo.modelPathFP32);
+ }
+ return std::string();
+}
+
static inline void genData(const InferenceEngine::TensorDesc& desc, Mat& m, Blob::Ptr& dataPtr)
{
const std::vector<size_t>& dims = desc.getDims();
bool isFP16 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD);
- const std::map<std::string, OpenVINOModelTestCaseInfo>& models = getOpenVINOTestModels();
- const auto it = models.find(modelName);
- ASSERT_TRUE(it != models.end()) << modelName;
- OpenVINOModelTestCaseInfo modelInfo = it->second;
- std::string modelPath = isFP16 ? modelInfo.modelPathFP16 : modelInfo.modelPathFP32;
+ const std::string modelPath = getOpenVINOModel(modelName, isFP16);
+ ASSERT_FALSE(modelPath.empty()) << modelName;
std::string xmlPath = findDataFile(modelPath + ".xml", false);
std::string binPath = findDataFile(modelPath + ".bin", false);
Target target = (dnn::Target)(int)GetParam();
bool isFP16 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD);
-
- OpenVINOModelTestCaseInfo modelInfo = getOpenVINOTestModels().find("age-gender-recognition-retail-0013")->second;
-
- std::string modelPath = isFP16 ? modelInfo.modelPathFP16 : modelInfo.modelPathFP32;
+ const std::string modelName = "age-gender-recognition-retail-0013";
+ const std::string modelPath = getOpenVINOModel(modelName, isFP16);
+ ASSERT_FALSE(modelPath.empty()) << modelName;
std::string xmlPath = findDataFile(modelPath + ".xml");
std::string binPath = findDataFile(modelPath + ".bin");
src/api/garray.cpp
src/api/gopaque.cpp
src/api/gscalar.cpp
+ src/api/gframe.cpp
src/api/gkernel.cpp
src/api/gbackend.cpp
src/api/gproto.cpp
endif()
target_link_libraries(${FLUID_TARGET} PRIVATE ade)
+
+if(WIN32)
+ # Required for htonl/ntohl on Windows
+ target_link_libraries(${FLUID_TARGET} PRIVATE wsock32 ws2_32)
+endif()
-#+TITLE: OpenCV 4.0 Graph API
+#+TITLE: OpenCV 4.4 Graph API
#+AUTHOR: Dmitry Matveev\newline Intel Corporation
#+OPTIONS: H:2 toc:t num:t
#+LATEX_CLASS: beamer
#+LATEX_CLASS_OPTIONS: [presentation]
#+LATEX_HEADER: \usepackage{transparent} \usepackage{listings} \usepackage{pgfplots} \usepackage{mtheme.sty/beamerthememetropolis}
-#+LATEX_HEADER: \setbeamertemplate{frame footer}{OpenCV 4.0 G-API: Overview and programming by example}
+#+LATEX_HEADER: \setbeamertemplate{frame footer}{OpenCV 4.4 G-API: Overview and programming by example}
#+BEAMER_HEADER: \subtitle{Overview and programming by example}
#+BEAMER_HEADER: \titlegraphic{ \vspace*{3cm}\hspace*{5cm} {\transparent{0.2}\includegraphics[height=\textheight]{ocv_logo.eps}}}
#+COLUMNS: %45ITEM %10BEAMER_ENV(Env) %10BEAMER_ACT(Act) %4BEAMER_COL(Col) %8BEAMER_OPT(Opt)
- OpenCV meets C++, ~cv::Mat~ replaces ~IplImage*~;
-*** Version 3.0: -- Welcome Transparent API (T-API)
+*** Version 3.0 -- Welcome Transparent API (T-API)
- ~cv::UMat~ is introduced as a /transparent/ addition to
~cv::Mat~;
** OpenCV evolution in one slide (cont'd)
# FIXME: Learn proper page-breaking!
-*** Version 4.0: -- Welcome Graph API (G-API)
+*** Version 4.0 -- Welcome Graph API (G-API)
- A new separate module (not a full library rewrite);
- A framework (or even a /meta/-framework);
- Kernels can be written in unconstrained platform-native code;
- Halide can serve as a backend (one of many).
+** OpenCV evolution in one slide (cont'd)
+# FIXME: Learn proper page-breaking!
+
+*** Version 4.2 -- New horizons
+
+- Introduced in-graph inference via OpenVINOâ„¢ Toolkit;
+- Introduced video-oriented Streaming execution mode;
+- Extended focus from individual image processing to the full
+ application pipeline optimization.
+
+*** Version 4.4 -- More on video
+
+- Introduced a notion of stateful kernels;
+ - The road to object tracking, background subtraction, etc. in the
+ graph;
+- Added more video-oriented operations (feature detection, Optical
+ flow).
+
** Why G-API?
*** Why introduce a new execution model?
- *Heterogeneity* gets extra benefits like:
- Avoiding unnecessary data transfers;
- Shadowing transfer costs with parallel host co-execution;
- - Increasing system throughput with frame-level pipelining.
+ - Improving system throughput with frame-level pipelining.
* Programming with G-API
- What data objects are /inputs/ to the graph?
- What are its /outputs/?
-** A code is worth a thousand words
+** The code is worth a thousand words
+ :PROPERTIES:
+ :BEAMER_opt: shrink=42
+ :END:
+
+#+BEGIN_SRC C++
+#include <opencv2/gapi.hpp> // G-API framework header
+#include <opencv2/gapi/imgproc.hpp> // cv::gapi::blur()
+#include <opencv2/highgui.hpp> // cv::imread/imwrite
+
+int main(int argc, char *argv[]) {
+ if (argc < 3) return 1;
+
+ cv::GMat in; // Express the graph:
+ cv::GMat out = cv::gapi::blur(in, cv::Size(3,3)); // `out` is a result of `blur` of `in`
+
+ cv::Mat in_mat = cv::imread(argv[1]); // Get the real data
+ cv::Mat out_mat; // Output buffer (may be empty)
+
+ cv::GComputation(cv::GIn(in), cv::GOut(out)) // Declare a graph from `in` to `out`
+ .apply(cv::gin(in_mat), cv::gout(out_mat)); // ...and run it immediately
+
+ cv::imwrite(argv[2], out_mat); // Save the result
+ return 0;
+}
+#+END_SRC
+
+** The code is worth a thousand words
:PROPERTIES:
:BEAMER_opt: shrink=42
:END:
}
#+END_SRC
-** A code is worth a thousand words (cont'd)
+** The code is worth a thousand words (cont'd)
# FIXME: sections!!!
*** What we have just learned?
** On data objects
Graph *protocol* defines what arguments a computation was defined on
- (both inputs and outputs), and what are the *shapes* (or types) of
- those arguments:
-
- | *Shape* | *Argument* | Size |
- |-------------+------------------+-----------------------------|
- | ~GMat~ | ~Mat~ | Static; defined during |
- | | | graph compilation |
- |-------------+------------------+-----------------------------|
- | ~GScalar~ | ~Scalar~ | 4 x ~double~ |
- |-------------+------------------+-----------------------------|
- | ~GArray<T>~ | ~std::vector<T>~ | Dynamic; defined in runtime |
+(both inputs and outputs), and what are the *shapes* (or types) of
+those arguments:
+
+ | *Shape* | *Argument* | Size |
+ |--------------+------------------+-----------------------------|
+ | ~GMat~ | ~Mat~ | Static; defined during |
+ | | | graph compilation |
+ |--------------+------------------+-----------------------------|
+ | ~GScalar~ | ~Scalar~ | 4 x ~double~ |
+ |--------------+------------------+-----------------------------|
+ | ~GArray<T>~ | ~std::vector<T>~ | Dynamic; defined in runtime |
+ |--------------+------------------+-----------------------------|
+ | ~GOpaque<T>~ | ~T~ | Static, ~sizeof(T)~ |
~GScalar~ may be value-initialized at construction time to allow
expressions like ~GMat a = 2*(b + 1)~.
-** Customization example
+** On operations and kernels
+ :PROPERTIES:
+ :BEAMER_opt: shrink=22
+ :END:
+
+*** :B_block:BMCOL:
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_col: 0.45
+ :END:
+
+- Graphs are built with *Operations* over virtual *Data*;
+- *Operations* define interfaces (literally);
+- *Kernels* are implementations to *Operations* (like in OOP);
+- An *Operation* is platform-agnostic, a *kernel* is not;
+- *Kernels* are implemented for *Backends*, the latter provide
+ APIs to write kernels;
+- Users can /add/ their *own* operations and kernels,
+ and also /redefine/ "standard" kernels their *own* way.
-*** Tuning the execution
+*** :B_block:BMCOL:
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_col: 0.45
+ :END:
-- Graph execution model is defined by kernels which are used;
-- Kernels can be specified in graph compilation arguments:
- #+LaTeX: {\footnotesize
- #+BEGIN_SRC C++
- #include <opencv2/gapi/fluid/core.hpp>
- #include <opencv2/gapi/fluid/imgproc.hpp>
- ...
- auto pkg = gapi::combine(gapi::core::fluid::kernels(),
- gapi::imgproc::fluid::kernels(),
- cv::unite_policy::KEEP);
- sobel.apply(in_mat, out_mat, compile_args(pkg));
- #+END_SRC
- #+LaTeX: }
-- OpenCL backend can be used in the same way;
- #+LaTeX: {\footnotesize
-- *NOTE*: ~cv::unite_policy~ has been removed in OpenCV 4.1.1.
- #+LaTeX: }
+#+BEGIN_SRC dot :file "000-ops-kernels.eps" :cmdline "-Kdot -Teps"
+digraph G {
+node [shape=box];
+rankdir=BT;
-** Operations and Kernels
+Gr [label="Graph"];
+Op [label="Operation\nA"];
+{rank=same
+Impl1 [label="Kernel\nA:2"];
+Impl2 [label="Kernel\nA:1"];
+}
-*** Specifying a kernel package
+Op -> Gr [dir=back, label="'consists of'"];
+Impl1 -> Op [];
+Impl2 -> Op [label="'is implemented by'"];
-- A *kernel* is an implementation of *operation* (= interface);
-- A *kernel package* hosts kernels that G-API should use;
-- Kernels are written for different *backends* and using their APIs;
-- Two kernel packages can be *merged* into a single one;
-- User can safely supply his *own kernels* to either /replace/ or
- /augment/ the default package.
- - Yes, even the standard kernels can be /overwritten/ by user from
- the outside!
-- *Heterogeneous* kernel package hosts kernels of different backends.
+node [shape=note,style=dashed];
+{rank=same
+Op;
+CommentOp [label="Abstract:\ndeclared via\nG_API_OP()"];
+}
+{rank=same
+Comment1 [label="Platform:\ndefined with\nOpenCL backend"];
+Comment2 [label="Platform:\ndefined with\nOpenCV backend"];
+}
-** Operations and Kernels (cont'd)
-# FIXME!!!
+CommentOp -> Op [constraint=false, style=dashed, arrowhead=none];
+Comment1 -> Impl1 [style=dashed, arrowhead=none];
+Comment2 -> Impl2 [style=dashed, arrowhead=none];
+}
+#+END_SRC
+
+** On operations and kernels (cont'd)
*** Defining an operation
- Metadata callback -- describe what is the output value format(s),
given the input and arguments.
- Use ~OpType::on(...)~ to use a new kernel ~OpType~ to construct graphs.
+
#+LaTeX: {\footnotesize
#+BEGIN_SRC C++
-G_TYPED_KERNEL(GSqrt,<GMat(GMat)>,"org.opencv.core.math.sqrt") {
+G_API_OP(GSqrt,<GMat(GMat)>,"org.opencv.core.math.sqrt") {
static GMatDesc outMeta(GMatDesc in) { return in; }
};
#+END_SRC
#+LaTeX: }
-** Operations and Kernels (cont'd)
-# FIXME!!!
+** On operations and kernels (cont'd)
+
+*** ~GSqrt~ vs. ~cv::gapi::sqrt()~
+
+- How a *type* relates to a *functions* from the example?
+- These functions are just wrappers over ~::on~:
+ #+LaTeX: {\scriptsize
+ #+BEGIN_SRC C++
+ G_API_OP(GSqrt,<GMat(GMat)>,"org.opencv.core.math.sqrt") {
+ static GMatDesc outMeta(GMatDesc in) { return in; }
+ };
+ GMat gapi::sqrt(const GMat& src) { return GSqrt::on(src); }
+ #+END_SRC
+ #+LaTeX: }
+- Why -- Doxygen, default parameters, 1:n mapping:
+ #+LaTeX: {\scriptsize
+ #+BEGIN_SRC C++
+ cv::GMat custom::unsharpMask(const cv::GMat &src,
+ const int sigma,
+ const float strength) {
+ cv::GMat blurred = cv::gapi::medianBlur(src, sigma);
+ cv::GMat laplacian = cv::gapi::Laplacian(blurred, CV_8U);
+ return (src - (laplacian * strength));
+ }
+ #+END_SRC
+ #+LaTeX: }
+
+** On operations and kernels (cont'd)
*** Implementing an operation
- Note ~run~ changes signature but still is derived from the operation
signature.
+** Operations and Kernels (cont'd)
+
+*** Specifying which kernels to use
+
+- Graph execution model is defined by kernels which are available/used;
+- Kernels can be specified via the graph compilation arguments:
+ #+LaTeX: {\footnotesize
+ #+BEGIN_SRC C++
+ #include <opencv2/gapi/fluid/core.hpp>
+ #include <opencv2/gapi/fluid/imgproc.hpp>
+ ...
+ auto pkg = cv::gapi::combine(cv::gapi::core::fluid::kernels(),
+ cv::gapi::imgproc::fluid::kernels());
+ sobel.apply(in_mat, out_mat, cv::compile_args(pkg));
+ #+END_SRC
+ #+LaTeX: }
+- Users can combine kernels of different backends and G-API will partition
+ the execution among those automatically.
+
+** Heterogeneity in G-API
+ :PROPERTIES:
+ :BEAMER_opt: shrink=35
+ :END:
+*** Automatic subgraph partitioning in G-API
+*** :B_block:BMCOL:
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_col: 0.18
+ :END:
+
+#+BEGIN_SRC dot :file "010-hetero-init.eps" :cmdline "-Kdot -Teps"
+digraph G {
+rankdir=TB;
+ranksep=0.3;
+
+node [shape=box margin=0 height=0.25];
+A; B; C;
+
+node [shape=ellipse];
+GMat0;
+GMat1;
+GMat2;
+GMat3;
+
+GMat0 -> A -> GMat1 -> B -> GMat2;
+GMat2 -> C;
+GMat0 -> C -> GMat3
+
+subgraph cluster {style=invis; A; GMat1; B; GMat2; C};
+}
+#+END_SRC
+
+The initial graph: operations are not resolved yet.
+
+*** :B_block:BMCOL:
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_col: 0.18
+ :END:
+
+#+BEGIN_SRC dot :file "011-hetero-homo.eps" :cmdline "-Kdot -Teps"
+digraph G {
+rankdir=TB;
+ranksep=0.3;
+
+node [shape=box margin=0 height=0.25];
+A; B; C;
+
+node [shape=ellipse];
+GMat0;
+GMat1;
+GMat2;
+GMat3;
+
+GMat0 -> A -> GMat1 -> B -> GMat2;
+GMat2 -> C;
+GMat0 -> C -> GMat3
+
+subgraph cluster {style=filled;color=azure2; A; GMat1; B; GMat2; C};
+}
+#+END_SRC
+
+All operations are handled by the same backend.
+
+*** :B_block:BMCOL:
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_col: 0.18
+ :END:
+
+#+BEGIN_SRC dot :file "012-hetero-a.eps" :cmdline "-Kdot -Teps"
+digraph G {
+rankdir=TB;
+ranksep=0.3;
+
+node [shape=box margin=0 height=0.25];
+A; B; C;
+
+node [shape=ellipse];
+GMat0;
+GMat1;
+GMat2;
+GMat3;
+
+GMat0 -> A -> GMat1 -> B -> GMat2;
+GMat2 -> C;
+GMat0 -> C -> GMat3
+
+subgraph cluster_1 {style=filled;color=azure2; A; GMat1; B; }
+subgraph cluster_2 {style=filled;color=ivory2; C};
+}
+#+END_SRC
+
+~A~ & ~B~ are of backend ~1~, ~C~ is of backend ~2~.
+
+*** :B_block:BMCOL:
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_col: 0.18
+ :END:
+
+#+BEGIN_SRC dot :file "013-hetero-b.eps" :cmdline "-Kdot -Teps"
+digraph G {
+rankdir=TB;
+ranksep=0.3;
+
+node [shape=box margin=0 height=0.25];
+A; B; C;
+
+node [shape=ellipse];
+GMat0;
+GMat1;
+GMat2;
+GMat3;
+
+GMat0 -> A -> GMat1 -> B -> GMat2;
+GMat2 -> C;
+GMat0 -> C -> GMat3
+
+subgraph cluster_1 {style=filled;color=azure2; A};
+subgraph cluster_2 {style=filled;color=ivory2; B};
+subgraph cluster_3 {style=filled;color=azure2; C};
+}
+#+END_SRC
+
+~A~ & ~C~ are of backend ~1~, ~B~ is of backend ~2~.
+
+** Heterogeneity in G-API
+
+*** Heterogeneity summary
+
+- G-API automatically partitions its graph in subgraphs (called "islands")
+ based on the available kernels;
+- Adjacent kernels taken from the same backend are "fused" into the same
+ "island";
+- G-API implements a two-level execution model:
+ - Islands are executed at the top level by a G-API's *Executor*;
+ - Island internals are run at the bottom level by its *Backend*;
+- G-API fully delegates the low-level execution and memory management to backends.
+
+* Inference and Streaming
+
+** Inference with G-API
+
+*** In-graph inference example
+
+- Starting with OpencV 4.2 (2019), G-API allows to integrate ~infer~
+ operations into the graph:
+ #+LaTeX: {\scriptsize
+ #+BEGIN_SRC C++
+ G_API_NET(ObjDetect, <cv::GMat(cv::GMat)>, "pdf.example.od");
+
+ cv::GMat in;
+ cv::GMat blob = cv::gapi::infer<ObjDetect>(bgr);
+ cv::GOpaque<cv::Size> size = cv::gapi::streaming::size(bgr);
+ cv::GArray<cv::Rect> objs = cv::gapi::streaming::parseSSD(blob, size);
+ cv::GComputation pipelne(cv::GIn(in), cv::GOut(objs));
+ #+END_SRC
+ #+LaTeX: }
+- Starting with OpenCV 4.5 (2020), G-API will provide more streaming-
+ and NN-oriented operations out of the box.
+
+** Inference with G-API
+
+*** What is the difference?
+
+- ~ObjDetect~ is not an operation, ~cv::gapi::infer<T>~ is;
+- ~cv::gapi::infer<T>~ is a *generic* operation, where ~T=ObjDetect~ describes
+ the calling convention:
+ - How many inputs the network consumes,
+ - How many outputs the network produces.
+- Inference data types are ~GMat~ only:
+ - Representing an image, then preprocessed automatically;
+ - Representing a blob (n-dimensional ~Mat~), then passed as-is.
+- Inference *backends* only need to implement a single generic operation ~infer~.
+
+** Inference with G-API
+
+*** But how does it run?
+
+- Since ~infer~ is an *Operation*, backends may provide *Kernels* implenting it;
+- The only publicly available inference backend now is *OpenVINOâ„¢*:
+ - Brings its ~infer~ kernel atop of the Inference Engine;
+- NN model data is passed through G-API compile arguments (like kernels);
+- Every NN backend provides its own structure to configure the network (like
+ a kernel API).
+
+** Inference with G-API
+
+*** Passing OpenVINOâ„¢ parameters to G-API
+
+- ~ObjDetect~ example:
+ #+LaTeX: {\footnotesize
+ #+BEGIN_SRC C++
+ auto face_net = cv::gapi::ie::Params<ObjDetect> {
+ face_xml_path, // path to the topology IR
+ face_bin_path, // path to the topology weights
+ face_device_string, // OpenVINO plugin (device) string
+ };
+ auto networks = cv::gapi::networks(face_net);
+ pipeline.compile(.., cv::compile_args(..., networks));
+ #+END_SRC
+ #+LaTeX: }
+- ~AgeGender~ requires binding Op's outputs to NN layers:
+ #+LaTeX: {\footnotesize
+ #+BEGIN_SRC C++
+ auto age_net = cv::gapi::ie::Params<AgeGender> {
+ ...
+ }.cfgOutputLayers({"age_conv3", "prob"}); // array<string,2> !
+ #+END_SRC
+ #+LaTeX: }
+
+** Streaming with G-API
+
+#+BEGIN_SRC dot :file 020-fd-demo.eps :cmdline "-Kdot -Teps"
+digraph {
+ rankdir=LR;
+ node [shape=box];
+
+ cap [label=Capture];
+ dec [label=Decode];
+ res [label=Resize];
+ cnn [label=Infer];
+ vis [label=Visualize];
+
+ cap -> dec;
+ dec -> res;
+ res -> cnn;
+ cnn -> vis;
+}
+#+END_SRC
+Anatomy of a regular video analytics application
+
+** Streaming with G-API
+
+#+BEGIN_SRC dot :file 021-fd-serial.eps :cmdline "-Kdot -Teps"
+digraph {
+ node [shape=box margin=0 width=0.3 height=0.4]
+ nodesep=0.2;
+ rankdir=LR;
+
+ subgraph cluster0 {
+ colorscheme=blues9
+ pp [label="..." shape=plaintext];
+ v0 [label=V];
+ label="Frame N-1";
+ color=7;
+ }
+
+ subgraph cluster1 {
+ colorscheme=blues9
+ c1 [label=C];
+ d1 [label=D];
+ r1 [label=R];
+ i1 [label=I];
+ v1 [label=V];
+ label="Frame N";
+ color=6;
+ }
+
+ subgraph cluster2 {
+ colorscheme=blues9
+ c2 [label=C];
+ nn [label="..." shape=plaintext];
+ label="Frame N+1";
+ color=5;
+ }
+
+ c1 -> d1 -> r1 -> i1 -> v1;
+
+ pp-> v0;
+ v0 -> c1 [style=invis];
+ v1 -> c2 [style=invis];
+ c2 -> nn;
+}
+#+END_SRC
+Serial execution of the sample video analytics application
+
+** Streaming with G-API
+ :PROPERTIES:
+ :BEAMER_opt: shrink
+ :END:
+
+#+BEGIN_SRC dot :file 022-fd-pipelined.eps :cmdline "-Kdot -Teps"
+digraph {
+ nodesep=0.2;
+ ranksep=0.2;
+ node [margin=0 width=0.4 height=0.2];
+ node [shape=plaintext]
+ Camera [label="Camera:"];
+ GPU [label="GPU:"];
+ FPGA [label="FPGA:"];
+ CPU [label="CPU:"];
+ Time [label="Time:"];
+ t6 [label="T6"];
+ t7 [label="T7"];
+ t8 [label="T8"];
+ t9 [label="T9"];
+ t10 [label="T10"];
+ tnn [label="..."];
+
+ node [shape=box margin=0 width=0.4 height=0.4 colorscheme=blues9]
+ node [color=9] V3;
+ node [color=8] F4; V4;
+ node [color=7] DR5; F5; V5;
+ node [color=6] C6; DR6; F6; V6;
+ node [color=5] C7; DR7; F7; V7;
+ node [color=4] C8; DR8; F8;
+ node [color=3] C9; DR9;
+ node [color=2] C10;
+
+ {rank=same; rankdir=LR; Camera C6 C7 C8 C9 C10}
+ Camera -> C6 -> C7 -> C8 -> C9 -> C10 [style=invis];
+
+ {rank=same; rankdir=LR; GPU DR5 DR6 DR7 DR8 DR9}
+ GPU -> DR5 -> DR6 -> DR7 -> DR8 -> DR9 [style=invis];
+
+ C6 -> DR5 [style=invis];
+ C6 -> DR6 [constraint=false];
+ C7 -> DR7 [constraint=false];
+ C8 -> DR8 [constraint=false];
+ C9 -> DR9 [constraint=false];
+
+ {rank=same; rankdir=LR; FPGA F4 F5 F6 F7 F8}
+ FPGA -> F4 -> F5 -> F6 -> F7 -> F8 [style=invis];
+
+ DR5 -> F4 [style=invis];
+ DR5 -> F5 [constraint=false];
+ DR6 -> F6 [constraint=false];
+ DR7 -> F7 [constraint=false];
+ DR8 -> F8 [constraint=false];
+
+ {rank=same; rankdir=LR; CPU V3 V4 V5 V6 V7}
+ CPU -> V3 -> V4 -> V5 -> V6 -> V7 [style=invis];
+
+ F4 -> V3 [style=invis];
+ F4 -> V4 [constraint=false];
+ F5 -> V5 [constraint=false];
+ F6 -> V6 [constraint=false];
+ F7 -> V7 [constraint=false];
+
+ {rank=same; rankdir=LR; Time t6 t7 t8 t9 t10 tnn}
+ Time -> t6 -> t7 -> t8 -> t9 -> t10 -> tnn [style=invis];
+
+ CPU -> Time [style=invis];
+ V3 -> t6 [style=invis];
+ V4 -> t7 [style=invis];
+ V5 -> t8 [style=invis];
+ V6 -> t9 [style=invis];
+ V7 -> t10 [style=invis];
+}
+#+END_SRC
+Pipelined execution for the video analytics application
+
+** Streaming with G-API: Example
+
+**** Serial mode (4.0) :B_block:BMCOL:
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_col: 0.45
+ :END:
+#+LaTeX: {\tiny
+#+BEGIN_SRC C++
+pipeline = cv::GComputation(...);
+
+cv::VideoCapture cap(input);
+cv::Mat in_frame;
+std::vector<cv::Rect> out_faces;
+
+while (cap.read(in_frame)) {
+ pipeline.apply(cv::gin(in_frame),
+ cv::gout(out_faces),
+ cv::compile_args(kernels,
+ networks));
+ // Process results
+ ...
+}
+#+END_SRC
+#+LaTeX: }
+
+**** Streaming mode (since 4.2) :B_block:BMCOL:
+ :PROPERTIES:
+ :BEAMER_env: block
+ :BEAMER_col: 0.45
+ :END:
+#+LaTeX: {\tiny
+#+BEGIN_SRC C++
+pipeline = cv::GComputation(...);
+
+auto in_src = cv::gapi::wip::make_src
+ <cv::gapi::wip::GCaptureSource>(input)
+auto cc = pipeline.compileStreaming
+ (cv::compile_args(kernels, networks))
+cc.setSource(cv::gin(in_src));
+cc.start();
+
+std::vector<cv::Rect> out_faces;
+while (cc.pull(cv::gout(out_faces))) {
+ // Process results
+ ...
+}
+#+END_SRC
+#+LaTeX: }
+
+**** More information
+
+#+LaTeX: {\footnotesize
+https://opencv.org/hybrid-cv-dl-pipelines-with-opencv-4-4-g-api/
+#+LaTeX: }
+
+* Latest features
+** Latest features
+*** Python API
+
+- Initial Python3 binding is available now in ~master~ (future 4.5);
+- Only basic CV functionality is supported (~core~ & ~imgproc~ namespaces,
+ selecting backends);
+- Adding more programmability, inference, and streaming is next.
+
+** Latest features
+*** Python API
+
+#+LaTeX: {\footnotesize
+#+BEGIN_SRC Python
+import numpy as np
+import cv2 as cv
+
+sz = (1280, 720)
+in1 = np.random.randint(0, 100, sz).astype(np.uint8)
+in2 = np.random.randint(0, 100, sz).astype(np.uint8)
+
+g_in1 = cv.GMat()
+g_in2 = cv.GMat()
+g_out = cv.gapi.add(g_in1, g_in2)
+gr = cv.GComputation(g_in1, g_in2, g_out)
+
+pkg = cv.gapi.core.fluid.kernels()
+out = gr.apply(in1, in2, args=cv.compile_args(pkg))
+#+END_SRC
+#+LaTeX: }
+
* Understanding the "G-Effect"
** Understanding the "G-Effect"
* Resources on G-API
** Resources on G-API
-
+ :PROPERTIES:
+ :BEAMER_opt: shrink
+ :END:
*** Repository
- https://github.com/opencv/opencv (see ~modules/gapi~)
-- Integral part of OpenCV starting version 4.0;
+
+*** Article
+
+- https://opencv.org/hybrid-cv-dl-pipelines-with-opencv-4-4-g-api/
*** Documentation
-- https://docs.opencv.org/master/d0/d1e/gapi.html
-- A tutorial and a class reference are there as well.
+- https://docs.opencv.org/4.4.0/d0/d1e/gapi.html
+
+*** Tutorials
+- https://docs.opencv.org/4.4.0/df/d7e/tutorial_table_of_content_gapi.html
* Thank you!
GSCALAR,
GARRAY,
GOPAQUE,
+ GFRAME,
};
struct GCompileArg;
--- /dev/null
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GFRAME_HPP
+#define OPENCV_GAPI_GFRAME_HPP
+
+#include <ostream>
+#include <memory> // std::shared_ptr
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/gcommon.hpp> // GShape
+
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/own/assert.hpp>
+
+// TODO GAPI_EXPORTS or so
+namespace cv
+{
+// Forward declaration; GNode and GOrigin are an internal
+// (user-inaccessible) classes.
+class GNode;
+struct GOrigin;
+
+/** \addtogroup gapi_data_objects
+ * @{
+ */
+class GAPI_EXPORTS_W_SIMPLE GFrame
+{
+public:
+ GAPI_WRAP GFrame(); // Empty constructor
+ GFrame(const GNode &n, std::size_t out); // Operation result constructor
+
+ GOrigin& priv(); // Internal use only
+ const GOrigin& priv() const; // Internal use only
+
+private:
+ std::shared_ptr<GOrigin> m_priv;
+};
+/** @} */
+
+/**
+ * \addtogroup gapi_meta_args
+ * @{
+ */
+struct GAPI_EXPORTS GFrameDesc
+{
+};
+static inline GFrameDesc empty_gframe_desc() { return GFrameDesc{}; }
+/** @} */
+
+GAPI_EXPORTS std::ostream& operator<<(std::ostream& os, const cv::GFrameDesc &desc);
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GFRAME_HPP
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2020 Intel Corporation
#ifndef OPENCV_GAPI_GMAT_HPP
using GMat::GMat;
};
-class GAPI_EXPORTS GFrame : public GMat
-{
-public:
- using GMat::GMat;
-};
-
/** @} */
/**
#include <opencv2/gapi/gscalar.hpp>
#include <opencv2/gapi/garray.hpp>
#include <opencv2/gapi/gopaque.hpp>
+#include <opencv2/gapi/gframe.hpp>
#include <opencv2/gapi/streaming/source.hpp>
#include <opencv2/gapi/gcommon.hpp>
--- /dev/null
+#include <opencv2/gapi.hpp> // G-API framework header
+#include <opencv2/gapi/imgproc.hpp> // cv::gapi::blur()
+#include <opencv2/highgui.hpp> // cv::imread/imwrite
+
+int main(int argc, char *argv[]) {
+ if (argc < 3) return 1;
+
+ cv::GMat in; // Express the graph:
+ cv::GMat out = cv::gapi::blur(in, cv::Size(3,3)); // `out` is a result of `blur` of `in`
+
+ cv::Mat in_mat = cv::imread(argv[1]); // Get the real data
+ cv::Mat out_mat; // Output buffer (may be empty)
+
+ cv::GComputation(cv::GIn(in), cv::GOut(out)) // Declare a graph from `in` to `out`
+ .apply(cv::gin(in_mat), cv::gout(out_mat)); // ...and run it immediately
+
+ cv::imwrite(argv[2], out_mat); // Save the result
+ return 0;
+}
--- /dev/null
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+
+#include "precomp.hpp"
+
+#include <opencv2/gapi/gframe.hpp>
+
+#include "api/gorigin.hpp"
+
+// cv::GFrame public implementation //////////////////////////////////////////////
+cv::GFrame::GFrame()
+ : m_priv(new GOrigin(GShape::GMAT, GNode::Param())) {
+ // N.B.: The shape here is still GMAT as currently cv::Mat is used
+ // as an underlying host type. Will be changed to GFRAME once
+ // GExecutor & GStreamingExecutor & selected backends will be extended
+ // to support cv::MediaFrame.
+}
+
+cv::GFrame::GFrame(const GNode &n, std::size_t out)
+ : m_priv(new GOrigin(GShape::GMAT, n, out)) {
+ // N.B.: GMAT is here for the same reason as above ^
+}
+
+cv::GOrigin& cv::GFrame::priv() {
+ return *m_priv;
+}
+
+const cv::GOrigin& cv::GFrame::priv() const {
+ return *m_priv;
+}
+
+namespace cv {
+std::ostream& operator<<(std::ostream& os, const cv::GFrameDesc &) {
+ return os;
+}
+
+} // namespace cv
case GShape::GSCALAR: m_res.slot<cv::Scalar>()[rc.id] = util::get<cv::Scalar>(arg); break;
case GShape::GARRAY: m_res.slot<cv::detail::VectorRef>()[rc.id] = util::get<cv::detail::VectorRef>(arg); break;
case GShape::GOPAQUE: m_res.slot<cv::detail::OpaqueRef>()[rc.id] = util::get<cv::detail::OpaqueRef>(arg); break;
+ default: util::throw_error(std::logic_error("Unsupported input GShape type"));
}
}
cv::Size(128, 128)),
Values(-1, CV_8U, CV_32F),
Values(CORE_FLUID),
- Values(ADD, SUB, DIV, MUL),
+ Values(DIV, MUL),
testing::Bool(),
Values(1.0),
testing::Bool()));
-INSTANTIATE_TEST_CASE_P(SubTestFluid, MathOpTest,
+// FIXME: Accuracy test for SUB math operation fails on FullHD and HD CV_16SC1 input cv::Mat,
+// double-presicion input cv::Scalar and CV_32FC1 output cv::Mat on Mac.
+// Accuracy test for ADD math operation fails on HD CV_16SC1 input cv::Mat,
+// double-presicion input cv::Scalar and CV_32FC1 output cv::Mat on Mac.
+// As failures are sporadic, disabling all instantiation cases for SUB and ADD.
+// Github ticket: https://github.com/opencv/opencv/issues/18373.
+INSTANTIATE_TEST_CASE_P(DISABLED_MathOpTestFluid, MathOpTest,
+ Combine(Values(CV_8UC3, CV_8UC1, CV_16SC1, CV_32FC1),
+ Values(cv::Size(1920, 1080),
+ cv::Size(1280, 720),
+ cv::Size(640, 480),
+ cv::Size(128, 128)),
+ Values(-1, CV_8U, CV_32F),
+ Values(CORE_FLUID),
+ Values(ADD, SUB),
+ testing::Bool(),
+ Values(1.0),
+ testing::Bool()));
+
+// FIXME: Accuracy test for SUB math operation fails on CV_16SC1 input cv::Mat, double-presicion
+// input cv::Scalar and CV_32FC1 output cv::Mat on Mac.
+// As failures are sporadic, disabling all instantiation cases for SUB operation.
+// Github ticket: https://github.com/opencv/opencv/issues/18373.
+INSTANTIATE_TEST_CASE_P(DISABLED_SubTestFluid, MathOpTest,
Combine(Values(CV_8UC1, CV_16SC1 , CV_32FC1),
Values(cv::Size(1280, 720),
cv::Size(640, 480),
#include <opencv2/core.hpp>
#include <opencv2/core/cvstd.hpp>
+#ifdef HAVE_OPENCV_VIDEO
#include <opencv2/video.hpp>
+#endif
namespace opencv_test
{
static GMatDesc outMeta(GMatDesc in) { return in.withType(CV_8U, 1); }
};
-
+#ifdef HAVE_OPENCV_VIDEO
GAPI_OCV_KERNEL_ST(GOCVBackSub, GBackSub, cv::BackgroundSubtractor)
{
static void setup(const cv::GMatDesc &/* desc */,
state.apply(in, out, -1);
}
};
+#endif
};
TEST(StatefulKernel, StateIsMutableInRuntime)
EXPECT_THROW(comp.apply(in_mat, out_mat, cv::compile_args(pkg)), std::logic_error);
}
+#ifdef HAVE_OPENCV_VIDEO
namespace
{
void compareBackSubResults(const cv::Mat &actual, const cv::Mat &expected,
pOcvBackSub->apply(frame, ocvForeground);
compareBackSubResults(gapiForeground, ocvForeground, 1);
}
+#endif
+#ifdef HAVE_OPENCV_VIDEO
namespace
{
void testBackSubInStreaming(cv::GStreamingCompiled gapiBackSub, const int diffPercent)
// Allowing 5% difference of all pixels between G-API and reference OpenCV results
testBackSubInStreaming(gapiBackSub, 5);
}
+#endif
//-------------------------------------------------------------------------------------------------------------
}
};
-GAPI_OCV_KERNEL(OCVBlurFrame, GBlurFrame)
-{
+GAPI_OCV_KERNEL(OCVBlurFrame, GBlurFrame) {
static void run(const cv::Mat& in, cv::Mat& out) {
cv::blur(in, out, cv::Size{3,3});
}
};
-struct GFrameTest : public ::testing::Test
-{
+struct GFrameTest : public ::testing::Test {
cv::Size sz{32,32};
cv::Mat in_mat;
cv::Mat out_mat;
GFrameTest()
: in_mat(cv::Mat(sz, CV_8UC1))
, out_mat(cv::Mat::zeros(sz, CV_8UC1))
- , out_mat_ocv(cv::Mat::zeros(sz, CV_8UC1))
- {
+ , out_mat_ocv(cv::Mat::zeros(sz, CV_8UC1)) {
cv::randn(in_mat, cv::Scalar::all(127.0f), cv::Scalar::all(40.f));
cv::blur(in_mat, out_mat_ocv, cv::Size{3,3});
}
- void check()
- {
+ void check() {
EXPECT_EQ(0, cvtest::norm(out_mat, out_mat_ocv, NORM_INF));
}
};
-TEST_F(GFrameTest, Input)
-{
+TEST_F(GFrameTest, Input) {
cv::GFrame in;
auto out = GBlurFrame::on(in);
cv::GComputation c(cv::GIn(in), cv::GOut(out));
Values (1.0),
testing::Bool()));
-INSTANTIATE_TEST_CASE_P(DivTestGPU, MathOpTest,
+// FIXME: Accuracy test for DIV math operation fails on CV_8UC3 HD input cv::Mat, double-presicion
+// input cv::Scalar and CV_16U output cv::Mat when we also test reverse operation on Mac.
+// Accuracy test for DIV math operation fails on CV_8UC3 VGA input cv::Mat, double-presicion
+// input cv::Scalar and output cv::Mat having the SAME depth as input one when we also test
+// reverse operation on Mac.
+// It is oddly, but test doesn't fail if we have VGA CV_8UC3 input cv::Mat, double-precision
+// input cv::Scalar and output cv::Mat having explicitly specified CV_8U depth when we also
+// test reverse operation on Mac.
+// As failures are sporadic, disabling all instantiation cases for DIV operation.
+// Github ticket: https://github.com/opencv/opencv/issues/18373.
+INSTANTIATE_TEST_CASE_P(DISABLED_DivTestGPU, MathOpTest,
Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
Values(cv::Size(1280, 720),
cv::Size(640, 480),
#endif // WINRT
}
+#if INF_ENGINE_RELEASE >= 2020010000
+static const std::string SUBDIR = "intel/age-gender-recognition-retail-0013/FP32/";
+#else
+static const std::string SUBDIR = "Retail/object_attributes/age_gender/dldt/";
+#endif
+
// FIXME: taken from the DNN module
void normAssert(cv::InputArray ref, cv::InputArray test,
const char *comment /*= ""*/,
EXPECT_LE(normInf, lInf) << comment;
}
-std::vector<std::string> modelPathByName(const std::string &model_name) {
- // Handle OMZ model layout changes among OpenVINO versions here
- static const std::unordered_multimap<std::string, std::string> map = {
-#if INF_ENGINE_RELEASE >= 2019040000 // >= 2019.R4
- {"age-gender-recognition-retail-0013",
- "2020.3.0/intel/age-gender-recognition-retail-0013/FP32"},
-#endif // INF_ENGINE_RELEASE >= 2019040000
- {"age-gender-recognition-retail-0013",
- "Retail/object_attributes/age_gender/dldt"},
- };
- const auto range = map.equal_range(model_name);
- std::vector<std::string> result;
- for (auto it = range.first; it != range.second; ++it) {
- result.emplace_back(it->second);
- }
- return result;
-}
-
-std::tuple<std::string, std::string> findModel(const std::string &model_name) {
- const auto candidates = modelPathByName(model_name);
- CV_Assert(!candidates.empty() && "No model path candidates found at all");
-
- for (auto &&path : candidates) {
- std::string model_xml, model_bin;
- try {
- model_xml = findDataFile(path + "/" + model_name + ".xml", false);
- model_bin = findDataFile(path + "/" + model_name + ".bin", false);
- // Return the first file which actually works
- return std::make_tuple(model_xml, model_bin);
- } catch (SkipTestException&) {
- // This is quite ugly but it is a way for OpenCV to let us know
- // this file wasn't found.
- continue;
- }
- }
-
- // Default behavior if reached here.
- throw SkipTestException("Files for " + model_name + " were not found");
-}
-
namespace IE = InferenceEngine;
void setNetParameters(IE::CNNNetwork& net) {
initDLDTDataPath();
cv::gapi::ie::detail::ParamDesc params;
- std::tie(params.model_path, params.weights_path) = findModel("age-gender-recognition-retail-0013");
+ params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
+ params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
params.device_id = "CPU";
// Load IE network, initialize input data using that.
initDLDTDataPath();
cv::gapi::ie::detail::ParamDesc params;
- std::tie(params.model_path, params.weights_path) = findModel("age-gender-recognition-retail-0013");
+ params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
+ params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
params.device_id = "CPU";
// FIXME: Ideally it should be an image from disk
using AGInfo = std::tuple<cv::GMat, cv::GMat>;
G_API_NET(AgeGender, <AGInfo(cv::GMat)>, "test-age-gender");
- ROIList() {
+ void SetUp() {
initDLDTDataPath();
- std::tie(params.model_path, params.weights_path) = findModel("age-gender-recognition-retail-0013");
+ params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
+ params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
params.device_id = "CPU";
// FIXME: it must be cv::imread(findDataFile("../dnn/grace_hopper_227.png", false));
initDLDTDataPath();
cv::gapi::ie::detail::ParamDesc AGparams;
- std::tie(AGparams.model_path, AGparams.weights_path) = findModel("age-gender-recognition-retail-0013");
+ AGparams.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+ AGparams.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
AGparams.device_id = "MYRIAD";
// FIXME: Ideally it should be an image from disk
AND NOT PYTHON3_NUMPY_VERSION VERSION_LESS "1.15"
AND PYTHON3_NUMPY_VERSION VERSION_LESS "1.17"
)
+ message(WARNING "Current NUMPY version (${PYTHON3_NUMPY_VERSION}) is not compatible with LIMITED_API.")
set(PYTHON3_LIMITED_API OFF)
endif()
if (!item)
return;
- PyTuple_SET_ITEM(py_tuple, I, item);
+ PyTuple_SetItem(py_tuple, I, item);
convert_to_python_tuple<I + 1, Tp...>(cpp_tuple, py_tuple);
}
foreach(mod opencv_videoio opencv_core opencv_imgproc opencv_imgcodecs)
ocv_target_link_libraries(${name} LINK_PRIVATE ${mod})
- ocv_target_include_directories(${name} PRIVATE "${OPENCV_MODULE_${mod}_LOCATION}/include")
+ ocv_target_include_directories(${name} "${OPENCV_MODULE_${mod}_LOCATION}/include")
endforeach()
if(WIN32)
#ifndef __VIDEOIO_H_
#define __VIDEOIO_H_
+#if defined(__OPENCV_BUILD) && defined(BUILD_PLUGIN)
+#undef __OPENCV_BUILD // allow public API only
+#include <opencv2/core.hpp>
+#include <opencv2/core/utils/trace.hpp>
+#endif
+
#if defined __linux__ || defined __APPLE__ || defined __HAIKU__
#include <unistd.h> // -D_FORTIFY_SOURCE=2 workaround: https://github.com/opencv/opencv/issues/15020
#endif
remove(filename.c_str());
}
+inline static std::string videoio_mfx_name_printer(const testing::TestParamInfo<videoio_mfx::ParamType>& info)
+{
+ std::ostringstream out;
+ const Size sz = get<0>(info.param);
+ const std::string ext = get<2>(info.param);
+ out << sz.width << "x" << sz.height << "x" << get<1>(info.param) << "x" << ext.substr(1, ext.size() - 1);
+ return out.str();
+}
+
INSTANTIATE_TEST_CASE_P(videoio, videoio_mfx,
testing::Combine(
testing::Values(Size(640, 480), Size(638, 478), Size(636, 476), Size(1920, 1080)),
testing::Values(1, 30, 100),
- testing::Values(".mpeg2", ".264", ".265")));
+ testing::Values(".mpeg2", ".264", ".265")),
+ videoio_mfx_name_printer);
}} // namespace
#ifndef __OPENCV_TEST_PRECOMP_HPP__
#define __OPENCV_TEST_PRECOMP_HPP__
+#include <sstream>
+
#include "opencv2/ts.hpp"
#include "opencv2/videoio.hpp"
#include "opencv2/videoio/registry.hpp"
if(HAVE_OPENVX)
add_subdirectory(openvx)
endif()
-if(UNIX AND NOT ANDROID AND (HAVE_VA OR HAVE_VA_INTEL))
+if(UNIX AND NOT ANDROID AND HAVE_VA)
add_subdirectory(va_intel)
endif()
if(ANDROID AND (BUILD_ANDROID_EXAMPLES OR INSTALL_ANDROID_EXAMPLES))
file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
foreach(sample_filename ${all_samples})
ocv_define_sample(tgt ${sample_filename} va_intel)
- ocv_target_link_libraries(${tgt} PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_VA_INTEL_SAMPLES_REQUIRED_DEPS} ${VA_LIBRARIES} ${VA_INTEL_LIBRARIES})
+ ocv_target_link_libraries(${tgt} PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_VA_INTEL_SAMPLES_REQUIRED_DEPS} ${VA_LIBRARIES})
endforeach()