GAPI_EXPORTS cv::gapi::GBackend backend();
+/**
+ * Specify how G-API and IE should trait input data
+ *
+ * In OpenCV, the same cv::Mat is used to represent both
+ * image and tensor data. Sometimes those are hardly distinguishable,
+ * so this extra parameter is used to give G-API a hint.
+ *
+ * This hint controls how G-API reinterprets the data when converting
+ * it to IE Blob format (and which layout/etc is assigned to this data).
+ */
+enum class TraitAs: int
+{
+ TENSOR, //!< G-API traits an associated cv::Mat as a raw tensor and passes dimensions as-is
+ IMAGE //!< G-API traits an associated cv::Mat as an image so creates an "image" blob (NCHW/NHWC, etc)
+};
+
namespace detail {
struct ParamDesc {
std::string model_path;
std::vector<std::string> input_names;
std::vector<std::string> output_names;
- std::unordered_map<std::string, cv::Mat> const_inputs;
+ using ConstInput = std::pair<cv::Mat, TraitAs>;
+ std::unordered_map<std::string, ConstInput> const_inputs;
// NB: nun_* may differ from topology's real input/output port numbers
// (e.g. topology's partial execution)
}
Params<Net>& constInput(const std::string &layer_name,
- const cv::Mat &data) {
- desc.const_inputs[layer_name] = data;
+ const cv::Mat &data,
+ TraitAs hint = TraitAs::TENSOR) {
+ desc.const_inputs[layer_name] = {data, hint};
return *this;
}
#ifdef HAVE_INF_ENGINE
-#if INF_ENGINE_RELEASE <= 2018050000
+#if INF_ENGINE_RELEASE <= 2019010000
# error G-API IE module supports only OpenVINO IE >= 2019 R1
#endif
#include <ade/util/chain_range.hpp>
#include <ade/typed_graph.hpp>
+#include <opencv2/core/utility.hpp>
+#include <opencv2/core/utils/logger.hpp>
+
#include <opencv2/gapi/gcommon.hpp>
#include <opencv2/gapi/garray.hpp>
#include <opencv2/gapi/util/any.hpp>
#include <opencv2/gapi/gtype_traits.hpp>
-
#include <opencv2/gapi/infer.hpp>
#include "compiler/gobjref.hpp"
return result;
}
+inline IE::Layout toIELayout(const std::size_t ndims) {
+ static const IE::Layout lts[] = {
+ IE::Layout::SCALAR,
+ IE::Layout::C,
+ IE::Layout::NC,
+ IE::Layout::CHW,
+ IE::Layout::NCHW,
+ IE::Layout::NCDHW,
+ };
+ // FIXME: This is not really a good conversion,
+ // since it may also stand for NHWC/HW/CN/NDHWC data
+ CV_Assert(ndims < sizeof(lts) / sizeof(lts[0]));
+ return lts[ndims];
+}
+
inline IE::Precision toIE(int depth) {
switch (depth) {
case CV_8U: return IE::Precision::U8;
return -1;
}
-inline IE::TensorDesc toIE(const cv::Mat &mat) {
+inline IE::TensorDesc toIE(const cv::Mat &mat, cv::gapi::ie::TraitAs hint) {
const auto &sz = mat.size;
// NB: For some reason RGB image is 2D image
// (since channel component is not counted here).
- if (sz.dims() == 2) {
+ // Note: regular 2D vectors also fall into this category
+ if (sz.dims() == 2 && hint == cv::gapi::ie::TraitAs::IMAGE)
+ {
// NB: This logic is mainly taken from IE samples
+ const size_t pixsz = CV_ELEM_SIZE1(mat.type());
const size_t channels = mat.channels();
const size_t height = mat.size().height;
const size_t width = mat.size().width;
const size_t strideW = mat.step.buf[1];
const bool is_dense =
- strideW == channels &&
- strideH == channels * width;
+ strideW == pixsz * channels &&
+ strideH == strideW * width;
if (!is_dense)
cv::util::throw_error(std::logic_error("Doesn't support conversion"
IE::Layout::NHWC);
}
- GAPI_Assert(sz.dims() == 4); // NB: Will relax when needed (to known use)
- return IE::TensorDesc(toIE(mat.depth()), toIE(sz), IE::Layout::NCHW);
+ return IE::TensorDesc(toIE(mat.depth()), toIE(sz), toIELayout(sz.dims()));
}
-inline IE::Blob::Ptr wrapIE(const cv::Mat &mat) {
- const auto tDesc = toIE(mat);
+inline IE::Blob::Ptr wrapIE(const cv::Mat &mat, cv::gapi::ie::TraitAs hint) {
+ const auto tDesc = toIE(mat, hint);
switch (mat.depth()) {
// NB: Seems there's no way to create an untyped (T-less) Blob::Ptr
// in IE given only precision via TensorDesc. So we have to do this:
}
// This method is [supposed to be] called at Island compilation stage
+ // TODO: Move to a new OpenVINO Core API!
cv::gimpl::ie::IECompiled compile() const {
auto this_plugin = IE::PluginDispatcher().getPluginByDevice(params.device_id);
+
+ // Load extensions (taken from DNN module)
+ if (params.device_id == "CPU" || params.device_id == "FPGA")
+ {
+ const std::string suffixes[] = { "_avx2", "_sse4", ""};
+ const bool haveFeature[] = {
+ cv::checkHardwareSupport(CPU_AVX2),
+ cv::checkHardwareSupport(CPU_SSE4_2),
+ true
+ };
+ std::vector<std::string> candidates;
+ for (auto &&it : ade::util::zip(ade::util::toRange(suffixes),
+ ade::util::toRange(haveFeature)))
+ {
+ std::string suffix;
+ bool available = false;
+ std::tie(suffix, available) = it;
+ if (!available) continue;
+#ifdef _WIN32
+ candidates.push_back("cpu_extension" + suffix + ".dll");
+#elif defined(__APPLE__)
+ candidates.push_back("libcpu_extension" + suffix + ".so"); // built as loadable module
+ candidates.push_back("libcpu_extension" + suffix + ".dylib"); // built as shared library
+#else
+ candidates.push_back("libcpu_extension" + suffix + ".so");
+#endif // _WIN32
+ }
+ for (auto &&extlib : candidates)
+ {
+ try
+ {
+ this_plugin.AddExtension(IE::make_so_pointer<IE::IExtension>(extlib));
+ CV_LOG_INFO(NULL, "DNN-IE: Loaded extension plugin: " << extlib);
+ break;
+ }
+ catch(...)
+ {
+ CV_LOG_WARNING(NULL, "Failed to load IE extension " << extlib);
+ }
+ }
+ }
+
auto this_network = this_plugin.LoadNetwork(net, {}); // FIXME: 2nd parameter to be
// configurable via the API
auto this_request = this_network.CreateInferRequest();
// Bind const data to infer request
for (auto &&p : params.const_inputs) {
- this_request.SetBlob(p.first, wrapIE(p.second));
+ // FIXME: SetBlob is known to be inefficient,
+ // it is worth to make a customizable "initializer" and pass the
+ // cv::Mat-wrapped blob there to support IE's optimal "GetBlob idiom"
+ // Still, constant data is to set only once.
+ this_request.SetBlob(p.first, wrapIE(p.second.first, p.second.second));
}
return {this_plugin, this_network, this_request};
// (A memory dialog comes to the picture again)
const cv::Mat this_mat = to_ocv(ctx.inMat(i));
- IE::Blob::Ptr this_blob = wrapIE(this_mat);
+ // FIXME: By default here we trait our inputs as images.
+ // May be we need to make some more intelligence here about it
+ IE::Blob::Ptr this_blob = wrapIE(this_mat, cv::gapi::ie::TraitAs::IMAGE);
iec.this_request.SetBlob(uu.params.input_names[i], this_blob);
}
iec.this_request.Infer();
const auto& in_roi_vec = ctx.inArg<cv::detail::VectorRef>(0u).rref<cv::Rect>();
const cv::Mat this_mat = to_ocv(ctx.inMat(1u));
- IE::Blob::Ptr this_blob = wrapIE(this_mat);
+ // Since we do a ROI list inference, always assume our input buffer is image
+ IE::Blob::Ptr this_blob = wrapIE(this_mat, cv::gapi::ie::TraitAs::IMAGE);
// FIXME: This could be done ONCE at graph compile stage!
std::vector< std::vector<int> > cached_dims(uu.params.num_out);
}
InferenceEngine::Blob::Ptr cv::gapi::ie::util::to_ie(cv::Mat &blob) {
- return wrapIE(blob);
+ return wrapIE(blob, cv::gapi::ie::TraitAs::IMAGE);
}
-#else
+#else // HAVE_INF_ENGINE
+
cv::gapi::GBackend cv::gapi::ie::backend() {
// Still provide this symbol to avoid linking issues
util::throw_error(std::runtime_error("G-API has been compiled without OpenVINO IE support"));
--- /dev/null
+#include "opencv2/opencv_modules.hpp"
+#if defined(HAVE_OPENCV_GAPI)
+
+#include <chrono>
+#include <iomanip>
+
+#include "opencv2/imgproc.hpp"
+#include "opencv2/highgui.hpp"
+
+#include "opencv2/gapi.hpp"
+#include "opencv2/gapi/core.hpp"
+#include "opencv2/gapi/imgproc.hpp"
+#include "opencv2/gapi/infer.hpp"
+#include "opencv2/gapi/infer/ie.hpp"
+#include "opencv2/gapi/cpu/gcpukernel.hpp"
+#include "opencv2/gapi/streaming/cap.hpp"
+
+namespace {
+const std::string about =
+ "This is an OpenCV-based version of Security Barrier Camera example";
+const std::string keys =
+ "{ h help | | print this help message }"
+ "{ input | | Path to an input video file }"
+ "{ fdm | | IE face detection model IR }"
+ "{ fdw | | IE face detection model weights }"
+ "{ fdd | | IE face detection device }"
+ "{ agem | | IE age/gender recognition model IR }"
+ "{ agew | | IE age/gender recognition model weights }"
+ "{ aged | | IE age/gender recognition model device }"
+ "{ emom | | IE emotions recognition model IR }"
+ "{ emow | | IE emotions recognition model weights }"
+ "{ emod | | IE emotions recognition model device }"
+ "{ pure | | When set, no output is displayed. Useful for benchmarking }";
+
+struct Avg {
+ struct Elapsed {
+ explicit Elapsed(double ms) : ss(ms/1000.), mm(static_cast<int>(ss)/60) {}
+ const double ss;
+ const int mm;
+ };
+
+ using MS = std::chrono::duration<double, std::ratio<1, 1000>>;
+ using TS = std::chrono::time_point<std::chrono::high_resolution_clock>;
+ TS started;
+
+ void start() { started = now(); }
+ TS now() const { return std::chrono::high_resolution_clock::now(); }
+ double tick() const { return std::chrono::duration_cast<MS>(now() - started).count(); }
+ Elapsed elapsed() const { return Elapsed{tick()}; }
+ double fps(std::size_t n) const { return static_cast<double>(n) / (tick() / 1000.); }
+};
+std::ostream& operator<<(std::ostream &os, const Avg::Elapsed &e) {
+ os << e.mm << ':' << (e.ss - 60*e.mm);
+ return os;
+}
+} // namespace
+
+namespace custom {
+// Describe networks we use in our program.
+// In G-API, topologies act like "operations". Here we define our
+// topologies as operations which have inputs and outputs.
+
+// Every network requires three parameters to define:
+// 1) Network's TYPE name - this TYPE is then used as a template
+// parameter to generic functions like cv::gapi::infer<>(),
+// and is used to define network's configuration (per-backend).
+// 2) Network's SIGNATURE - a std::function<>-like record which defines
+// networks' input and output parameters (its API)
+// 3) Network's IDENTIFIER - a string defining what the network is.
+// Must be unique within the pipeline.
+
+// Note: these definitions are neutral to _how_ the networks are
+// executed. The _how_ is defined at graph compilation stage (via parameters),
+// not on the graph construction stage.
+
+// Face detector: takes one Mat, returns another Mat
+G_API_NET(Faces, <cv::GMat(cv::GMat)>, "face-detector");
+
+// Age/Gender recognition - takes one Mat, returns two:
+// one for Age and one for Gender. In G-API, multiple-return-value operations
+// are defined using std::tuple<>.
+using AGInfo = std::tuple<cv::GMat, cv::GMat>;
+G_API_NET(AgeGender, <AGInfo(cv::GMat)>, "age-gender-recoginition");
+
+// Emotion recognition - takes one Mat, returns another.
+G_API_NET(Emotions, <cv::GMat(cv::GMat)>, "emotions-recognition");
+
+// SSD Post-processing function - this is not a network but a kernel.
+// The kernel body is declared separately, this is just an interface.
+// This operation takes two Mats (detections and the source image),
+// and returns a vector of ROI (filtered by a default threshold).
+// Threshold (or a class to select) may become a parameter, but since
+// this kernel is custom, it doesn't make a lot of sense.
+G_API_OP(PostProc, <cv::GArray<cv::Rect>(cv::GMat, cv::GMat)>, "custom.fd_postproc") {
+ static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GMatDesc &) {
+ // This function is required for G-API engine to figure out
+ // what the output format is, given the input parameters.
+ // Since the output is an array (with a specific type),
+ // there's nothing to describe.
+ return cv::empty_array_desc();
+ }
+};
+
+GAPI_OCV_KERNEL(OCVPostProc, PostProc) {
+ static void run(const cv::Mat &in_ssd_result,
+ const cv::Mat &in_frame,
+ std::vector<cv::Rect> &out_faces) {
+ const int MAX_PROPOSALS = 200;
+ const int OBJECT_SIZE = 7;
+ const cv::Size upscale = in_frame.size();
+ const cv::Rect surface({0,0}, upscale);
+
+ out_faces.clear();
+
+ const float *data = in_ssd_result.ptr<float>();
+ for (int i = 0; i < MAX_PROPOSALS; i++) {
+ const float image_id = data[i * OBJECT_SIZE + 0]; // batch id
+ const float confidence = data[i * OBJECT_SIZE + 2];
+ const float rc_left = data[i * OBJECT_SIZE + 3];
+ const float rc_top = data[i * OBJECT_SIZE + 4];
+ const float rc_right = data[i * OBJECT_SIZE + 5];
+ const float rc_bottom = data[i * OBJECT_SIZE + 6];
+
+ if (image_id < 0.f) { // indicates end of detections
+ break;
+ }
+ if (confidence < 0.5f) { // fixme: hard-coded snapshot
+ continue;
+ }
+
+ cv::Rect rc;
+ rc.x = static_cast<int>(rc_left * upscale.width);
+ rc.y = static_cast<int>(rc_top * upscale.height);
+ rc.width = static_cast<int>(rc_right * upscale.width) - rc.x;
+ rc.height = static_cast<int>(rc_bottom * upscale.height) - rc.y;
+ out_faces.push_back(rc & surface);
+ }
+ }
+};
+} // namespace custom
+
+namespace labels {
+const std::string genders[] = {
+ "Female", "Male"
+};
+const std::string emotions[] = {
+ "neutral", "happy", "sad", "surprise", "anger"
+};
+namespace {
+void DrawResults(cv::Mat &frame,
+ const std::vector<cv::Rect> &faces,
+ const std::vector<cv::Mat> &out_ages,
+ const std::vector<cv::Mat> &out_genders,
+ const std::vector<cv::Mat> &out_emotions) {
+ CV_Assert(faces.size() == out_ages.size());
+ CV_Assert(faces.size() == out_genders.size());
+ CV_Assert(faces.size() == out_emotions.size());
+
+ for (auto it = faces.begin(); it != faces.end(); ++it) {
+ const auto idx = std::distance(faces.begin(), it);
+ const auto &rc = *it;
+
+ const float *ages_data = out_ages[idx].ptr<float>();
+ const float *genders_data = out_genders[idx].ptr<float>();
+ const float *emotions_data = out_emotions[idx].ptr<float>();
+ const auto gen_id = std::max_element(genders_data, genders_data + 2) - genders_data;
+ const auto emo_id = std::max_element(emotions_data, emotions_data + 5) - emotions_data;
+
+ std::stringstream ss;
+ ss << static_cast<int>(ages_data[0]*100)
+ << ' '
+ << genders[gen_id]
+ << ' '
+ << emotions[emo_id];
+
+ const int ATTRIB_OFFSET = 15;
+ cv::rectangle(frame, rc, {0, 255, 0}, 4);
+ cv::putText(frame, ss.str(),
+ cv::Point(rc.x, rc.y - ATTRIB_OFFSET),
+ cv::FONT_HERSHEY_COMPLEX_SMALL,
+ 1,
+ cv::Scalar(0, 0, 255));
+ }
+}
+
+void DrawFPS(cv::Mat &frame, std::size_t n, double fps) {
+ std::ostringstream out;
+ out << "FRAME " << n << ": "
+ << std::fixed << std::setprecision(2) << fps
+ << " FPS (AVG)";
+ cv::putText(frame, out.str(),
+ cv::Point(0, frame.rows),
+ cv::FONT_HERSHEY_SIMPLEX,
+ 1,
+ cv::Scalar(0, 255, 0),
+ 2);
+}
+} // anonymous namespace
+} // namespace labels
+
+int main(int argc, char *argv[])
+{
+ cv::CommandLineParser cmd(argc, argv, keys);
+ cmd.about(about);
+ if (cmd.has("help")) {
+ cmd.printMessage();
+ return 0;
+ }
+ const std::string input = cmd.get<std::string>("input");
+ const bool no_show = cmd.get<bool>("pure");
+
+ // Express our processing pipeline. Lambda-based constructor
+ // is used to keep all temporary objects in a dedicated scope.
+ cv::GComputation pp([]() {
+ // Declare an empty GMat - the beginning of the pipeline.
+ cv::GMat in;
+
+ // Run face detection on the input frame. Result is a single GMat,
+ // internally representing an 1x1x200x7 SSD output.
+ // This is a single-patch version of infer:
+ // - Inference is running on the whole input image;
+ // - Image is converted and resized to the network's expected format
+ // automatically.
+ cv::GMat detections = cv::gapi::infer<custom::Faces>(in);
+
+ // Parse SSD output to a list of ROI (rectangles) using
+ // a custom kernel. Note: parsing SSD may become a "standard" kernel.
+ cv::GArray<cv::Rect> faces = custom::PostProc::on(detections, in);
+
+ // Now run Age/Gender model on every detected face. This model has two
+ // outputs (for age and gender respectively).
+ // A special ROI-list-oriented form of infer<>() is used here:
+ // - First input argument is the list of rectangles to process,
+ // - Second one is the image where to take ROI from;
+ // - Crop/Resize/Layout conversion happens automatically for every image patch
+ // from the list
+ // - Inference results are also returned in form of list (GArray<>)
+ // - Since there're two outputs, infer<> return two arrays (via std::tuple).
+ cv::GArray<cv::GMat> ages;
+ cv::GArray<cv::GMat> genders;
+ std::tie(ages, genders) = cv::gapi::infer<custom::AgeGender>(faces, in);
+
+ // Recognize emotions on every face.
+ // ROI-list-oriented infer<>() is used here as well.
+ // Since custom::Emotions network produce a single output, only one
+ // GArray<> is returned here.
+ cv::GArray<cv::GMat> emotions = cv::gapi::infer<custom::Emotions>(faces, in);
+
+ // Return the decoded frame as a result as well.
+ // Input matrix can't be specified as output one, so use copy() here
+ // (this copy will be optimized out in the future).
+ cv::GMat frame = cv::gapi::copy(in);
+
+ // Now specify the computation's boundaries - our pipeline consumes
+ // one images and produces five outputs.
+ return cv::GComputation(cv::GIn(in),
+ cv::GOut(frame, faces, ages, genders, emotions));
+ });
+
+ // Note: it might be very useful to have dimensions loaded at this point!
+ // After our computation is defined, specify how it should be executed.
+ // Execution is defined by inference backends and kernel backends we use to
+ // compile the pipeline (it is a different step).
+
+ // Declare IE parameters for FaceDetection network. Note here custom::Face
+ // is the type name we specified in GAPI_NETWORK() previously.
+ // cv::gapi::ie::Params<> is a generic configuration description which is
+ // specialized to every particular network we use.
+ //
+ // OpenCV DNN backend will have its own parmater structure with settings
+ // relevant to OpenCV DNN module. Same applies to other possible inference
+ // backends, like cuDNN, etc (:-))
+ auto det_net = cv::gapi::ie::Params<custom::Faces> {
+ cmd.get<std::string>("fdm"), // read cmd args: path to topology IR
+ cmd.get<std::string>("fdw"), // read cmd args: path to weights
+ cmd.get<std::string>("fdd"), // read cmd args: device specifier
+ };
+
+ auto age_net = cv::gapi::ie::Params<custom::AgeGender> {
+ cmd.get<std::string>("agem"), // read cmd args: path to topology IR
+ cmd.get<std::string>("agew"), // read cmd args: path to weights
+ cmd.get<std::string>("aged"), // read cmd args: device specifier
+ }.cfgOutputLayers({ "age_conv3", "prob" });
+
+ auto emo_net = cv::gapi::ie::Params<custom::Emotions> {
+ cmd.get<std::string>("emom"), // read cmd args: path to topology IR
+ cmd.get<std::string>("emow"), // read cmd args: path to weights
+ cmd.get<std::string>("emod"), // read cmd args: device specifier
+ };
+
+ // Form a kernel package (with a single OpenCV-based implementation of our
+ // post-processing) and a network package (holding our three networks).x
+ auto kernels = cv::gapi::kernels<custom::OCVPostProc>();
+ auto networks = cv::gapi::networks(det_net, age_net, emo_net);
+
+ // Compile our pipeline for a specific input image format (TBD - can be relaxed)
+ // and pass our kernels & networks as parameters.
+ // This is the place where G-API learns which networks & kernels we're actually
+ // operating with (the graph description itself known nothing about that).
+ auto cc = pp.compileStreaming(cv::GMatDesc{CV_8U,3,cv::Size(1280,720)},
+ cv::compile_args(kernels, networks));
+
+ std::cout << "Reading " << input << std::endl;
+ cc.setSource(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input));
+
+ Avg avg;
+ avg.start();
+ cc.start();
+
+ cv::Mat frame;
+ std::vector<cv::Rect> faces;
+ std::vector<cv::Mat> out_ages;
+ std::vector<cv::Mat> out_genders;
+ std::vector<cv::Mat> out_emotions;
+ std::size_t frames = 0u;
+
+ // Implement different execution policies depending on the display option
+ // for the best performance.
+ while (cc.running()) {
+ auto out_vector = cv::gout(frame, faces, out_ages, out_genders, out_emotions);
+ if (no_show) {
+ // This is purely a video processing. No need to balance with UI rendering.
+ // Use a blocking pull() to obtain data. Break the loop if the stream is over.
+ if (!cc.pull(std::move(out_vector)))
+ break;
+ } else if (!cc.try_pull(std::move(out_vector))) {
+ // Use a non-blocking try_pull() to obtain data.
+ // If there's no data, let UI refresh (and handle keypress)
+ if (cv::waitKey(1) >= 0) break;
+ else continue;
+ }
+ // At this point we have data for sure (obtained in either blocking or non-blocking way).
+ frames++;
+ labels::DrawResults(frame, faces, out_ages, out_genders, out_emotions);
+ labels::DrawFPS(frame, frames, avg.fps(frames));
+ if (!no_show) cv::imshow("Out", frame);
+ }
+ cc.stop();
+ std::cout << "Processed " << frames << " frames in " << avg.elapsed() << std::endl;
+
+ return 0;
+}
+#else
+#include <iostream>
+int main()
+{
+ std::cerr << "This tutorial code requires G-API module "
+ "with Inference Engine backend to run"
+ << std::endl;
+ return 1;
+}
+#endif // HAVE_OPECV_GAPI
--- /dev/null
+#include "opencv2/opencv_modules.hpp"
+#include <iostream>
+#if defined(HAVE_OPENCV_GAPI)
+
+#include <chrono>
+#include <iomanip>
+
+#include "opencv2/imgproc.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/gapi.hpp"
+#include "opencv2/gapi/core.hpp"
+#include "opencv2/gapi/imgproc.hpp"
+#include "opencv2/gapi/infer.hpp"
+#include "opencv2/gapi/infer/ie.hpp"
+#include "opencv2/gapi/cpu/gcpukernel.hpp"
+#include "opencv2/gapi/streaming/cap.hpp"
+#include "opencv2/highgui.hpp"
+
+const std::string about =
+ "This is an OpenCV-based version of Security Barrier Camera example";
+const std::string keys =
+ "{ h help | | print this help message }"
+ "{ input | | Path to an input video file }"
+ "{ detm | | IE vehicle/license plate detection model IR }"
+ "{ detw | | IE vehicle/license plate detection model weights }"
+ "{ detd | | IE vehicle/license plate detection model device }"
+ "{ vehm | | IE vehicle attributes model IR }"
+ "{ vehw | | IE vehicle attributes model weights }"
+ "{ vehd | | IE vehicle attributes model device }"
+ "{ lprm | | IE license plate recognition model IR }"
+ "{ lprw | | IE license plate recognition model weights }"
+ "{ lprd | | IE license plate recognition model device }"
+ "{ pure | | When set, no output is displayed. Useful for benchmarking }"
+ "{ ser | | When set, runs a regular (serial) pipeline }";
+
+namespace {
+struct Avg {
+ struct Elapsed {
+ explicit Elapsed(double ms) : ss(ms/1000.), mm(static_cast<int>(ss)/60) {}
+ const double ss;
+ const int mm;
+ };
+
+ using MS = std::chrono::duration<double, std::ratio<1, 1000>>;
+ using TS = std::chrono::time_point<std::chrono::high_resolution_clock>;
+ TS started;
+
+ void start() { started = now(); }
+ TS now() const { return std::chrono::high_resolution_clock::now(); }
+ double tick() const { return std::chrono::duration_cast<MS>(now() - started).count(); }
+ Elapsed elapsed() const { return Elapsed{tick()}; }
+ double fps(std::size_t n) const { return static_cast<double>(n) / (tick() / 1000.); }
+};
+std::ostream& operator<<(std::ostream &os, const Avg::Elapsed &e) {
+ os << e.mm << ':' << (e.ss - 60*e.mm);
+ return os;
+}
+} // namespace
+
+
+namespace custom {
+G_API_NET(VehicleLicenseDetector, <cv::GMat(cv::GMat)>, "vehicle-license-plate-detector");
+
+using Attrs = std::tuple<cv::GMat, cv::GMat>;
+G_API_NET(VehicleAttributes, <Attrs(cv::GMat)>, "vehicle-attributes");
+G_API_NET(LPR, <cv::GMat(cv::GMat)>, "license-plate-recognition");
+
+using GVehiclesPlates = std::tuple< cv::GArray<cv::Rect>
+ , cv::GArray<cv::Rect> >;
+G_API_OP_M(ProcessDetections,
+ <GVehiclesPlates(cv::GMat, cv::GMat)>,
+ "custom.security_barrier.detector.postproc") {
+ static std::tuple<cv::GArrayDesc,cv::GArrayDesc>
+ outMeta(const cv::GMatDesc &, const cv::GMatDesc) {
+ // FIXME: Need to get rid of this - literally there's nothing useful
+ return std::make_tuple(cv::empty_array_desc(), cv::empty_array_desc());
+ }
+};
+
+GAPI_OCV_KERNEL(OCVProcessDetections, ProcessDetections) {
+ static void run(const cv::Mat &in_ssd_result,
+ const cv::Mat &in_frame,
+ std::vector<cv::Rect> &out_vehicles,
+ std::vector<cv::Rect> &out_plates) {
+ const int MAX_PROPOSALS = 200;
+ const int OBJECT_SIZE = 7;
+ const cv::Size upscale = in_frame.size();
+ const cv::Rect surface({0,0}, upscale);
+
+ out_vehicles.clear();
+ out_plates.clear();
+
+ const float *data = in_ssd_result.ptr<float>();
+ for (int i = 0; i < MAX_PROPOSALS; i++) {
+ const float image_id = data[i * OBJECT_SIZE + 0]; // batch id
+ const float label = data[i * OBJECT_SIZE + 1];
+ const float confidence = data[i * OBJECT_SIZE + 2];
+ const float rc_left = data[i * OBJECT_SIZE + 3];
+ const float rc_top = data[i * OBJECT_SIZE + 4];
+ const float rc_right = data[i * OBJECT_SIZE + 5];
+ const float rc_bottom = data[i * OBJECT_SIZE + 6];
+
+ if (image_id < 0.f) { // indicates end of detections
+ break;
+ }
+ if (confidence < 0.5f) { // fixme: hard-coded snapshot
+ continue;
+ }
+
+ cv::Rect rc;
+ rc.x = static_cast<int>(rc_left * upscale.width);
+ rc.y = static_cast<int>(rc_top * upscale.height);
+ rc.width = static_cast<int>(rc_right * upscale.width) - rc.x;
+ rc.height = static_cast<int>(rc_bottom * upscale.height) - rc.y;
+
+ using PT = cv::Point;
+ using SZ = cv::Size;
+ switch (static_cast<int>(label)) {
+ case 1: out_vehicles.push_back(rc & surface); break;
+ case 2: out_plates.emplace_back((rc-PT(15,15)+SZ(30,30)) & surface); break;
+ default: CV_Assert(false && "Unknown object class");
+ }
+ }
+ }
+};
+} // namespace custom
+
+namespace labels {
+const std::string colors[] = {
+ "white", "gray", "yellow", "red", "green", "blue", "black"
+};
+const std::string types[] = {
+ "car", "van", "truck", "bus"
+};
+const std::vector<std::string> license_text = {
+ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
+ "<Anhui>", "<Beijing>", "<Chongqing>", "<Fujian>",
+ "<Gansu>", "<Guangdong>", "<Guangxi>", "<Guizhou>",
+ "<Hainan>", "<Hebei>", "<Heilongjiang>", "<Henan>",
+ "<HongKong>", "<Hubei>", "<Hunan>", "<InnerMongolia>",
+ "<Jiangsu>", "<Jiangxi>", "<Jilin>", "<Liaoning>",
+ "<Macau>", "<Ningxia>", "<Qinghai>", "<Shaanxi>",
+ "<Shandong>", "<Shanghai>", "<Shanxi>", "<Sichuan>",
+ "<Tianjin>", "<Tibet>", "<Xinjiang>", "<Yunnan>",
+ "<Zhejiang>", "<police>",
+ "A", "B", "C", "D", "E", "F", "G", "H", "I", "J",
+ "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T",
+ "U", "V", "W", "X", "Y", "Z"
+};
+namespace {
+void DrawResults(cv::Mat &frame,
+ const std::vector<cv::Rect> &vehicles,
+ const std::vector<cv::Mat> &out_colors,
+ const std::vector<cv::Mat> &out_types,
+ const std::vector<cv::Rect> &plates,
+ const std::vector<cv::Mat> &out_numbers) {
+ CV_Assert(vehicles.size() == out_colors.size());
+ CV_Assert(vehicles.size() == out_types.size());
+ CV_Assert(plates.size() == out_numbers.size());
+
+ for (auto it = vehicles.begin(); it != vehicles.end(); ++it) {
+ const auto idx = std::distance(vehicles.begin(), it);
+ const auto &rc = *it;
+
+ const float *colors_data = out_colors[idx].ptr<float>();
+ const float *types_data = out_types [idx].ptr<float>();
+ const auto color_id = std::max_element(colors_data, colors_data + 7) - colors_data;
+ const auto type_id = std::max_element(types_data, types_data + 4) - types_data;
+
+ const int ATTRIB_OFFSET = 25;
+ cv::rectangle(frame, rc, {0, 255, 0}, 4);
+ cv::putText(frame, labels::colors[color_id],
+ cv::Point(rc.x + 5, rc.y + ATTRIB_OFFSET),
+ cv::FONT_HERSHEY_COMPLEX_SMALL,
+ 1,
+ cv::Scalar(255, 0, 0));
+ cv::putText(frame, labels::types[type_id],
+ cv::Point(rc.x + 5, rc.y + ATTRIB_OFFSET * 2),
+ cv::FONT_HERSHEY_COMPLEX_SMALL,
+ 1,
+ cv::Scalar(255, 0, 0));
+ }
+
+ for (auto it = plates.begin(); it != plates.end(); ++it) {
+ const int MAX_LICENSE = 88;
+ const int LPR_OFFSET = 50;
+
+ const auto &rc = *it;
+ const auto idx = std::distance(plates.begin(), it);
+
+ std::string result;
+ const auto *lpr_data = out_numbers[idx].ptr<float>();
+ for (int i = 0; i < MAX_LICENSE; i++) {
+ if (lpr_data[i] == -1) break;
+ result += labels::license_text[static_cast<size_t>(lpr_data[i])];
+ }
+
+ const int y_pos = std::max(0, rc.y + rc.height - LPR_OFFSET);
+ cv::rectangle(frame, rc, {0, 0, 255}, 4);
+ cv::putText(frame, result,
+ cv::Point(rc.x, y_pos),
+ cv::FONT_HERSHEY_COMPLEX_SMALL,
+ 1,
+ cv::Scalar(0, 0, 255));
+ }
+}
+
+void DrawFPS(cv::Mat &frame, std::size_t n, double fps) {
+ std::ostringstream out;
+ out << "FRAME " << n << ": "
+ << std::fixed << std::setprecision(2) << fps
+ << " FPS (AVG)";
+ cv::putText(frame, out.str(),
+ cv::Point(0, frame.rows),
+ cv::FONT_HERSHEY_SIMPLEX,
+ 1,
+ cv::Scalar(0, 0, 0),
+ 2);
+}
+} // anonymous namespace
+} // namespace labels
+
+int main(int argc, char *argv[])
+{
+ cv::CommandLineParser cmd(argc, argv, keys);
+ cmd.about(about);
+ if (cmd.has("help")) {
+ cmd.printMessage();
+ return 0;
+ }
+ const std::string input = cmd.get<std::string>("input");
+ const bool no_show = cmd.get<bool>("pure");
+
+ cv::GComputation pp([]() {
+ cv::GMat in;
+ cv::GMat detections = cv::gapi::infer<custom::VehicleLicenseDetector>(in);
+ cv::GArray<cv::Rect> vehicles;
+ cv::GArray<cv::Rect> plates;
+ std::tie(vehicles, plates) = custom::ProcessDetections::on(detections, in);
+ cv::GArray<cv::GMat> colors;
+ cv::GArray<cv::GMat> types;
+ std::tie(colors, types) = cv::gapi::infer<custom::VehicleAttributes>(vehicles, in);
+ cv::GArray<cv::GMat> numbers = cv::gapi::infer<custom::LPR>(plates, in);
+ cv::GMat frame = cv::gapi::copy(in); // pass-through the input frame
+ return cv::GComputation(cv::GIn(in),
+ cv::GOut(frame, vehicles, colors, types, plates, numbers));
+ });
+
+ // Note: it might be very useful to have dimensions loaded at this point!
+ auto det_net = cv::gapi::ie::Params<custom::VehicleLicenseDetector> {
+ cmd.get<std::string>("detm"), // path to topology IR
+ cmd.get<std::string>("detw"), // path to weights
+ cmd.get<std::string>("detd"), // device specifier
+ };
+
+ auto attr_net = cv::gapi::ie::Params<custom::VehicleAttributes> {
+ cmd.get<std::string>("vehm"), // path to topology IR
+ cmd.get<std::string>("vehw"), // path to weights
+ cmd.get<std::string>("vehd"), // device specifier
+ }.cfgOutputLayers({ "color", "type" });
+
+ // Fill a special LPR input (seq_ind) with a predefined value
+ // First element is 0.f, the rest 87 are 1.f
+ const std::vector<int> lpr_seq_dims = {88,1};
+ cv::Mat lpr_seq(lpr_seq_dims, CV_32F, cv::Scalar(1.f));
+ lpr_seq.ptr<float>()[0] = 0.f;
+ auto lpr_net = cv::gapi::ie::Params<custom::LPR> {
+ cmd.get<std::string>("lprm"), // path to topology IR
+ cmd.get<std::string>("lprw"), // path to weights
+ cmd.get<std::string>("lprd"), // device specifier
+ }.constInput("seq_ind", lpr_seq);
+
+ auto kernels = cv::gapi::kernels<custom::OCVProcessDetections>();
+ auto networks = cv::gapi::networks(det_net, attr_net, lpr_net);
+
+ Avg avg;
+ cv::Mat frame;
+ std::vector<cv::Rect> vehicles, plates;
+ std::vector<cv::Mat> out_colors;
+ std::vector<cv::Mat> out_types;
+ std::vector<cv::Mat> out_numbers;
+ std::size_t frames = 0u;
+
+ std::cout << "Reading " << input << std::endl;
+
+ if (cmd.get<bool>("ser")) {
+ std::cout << "Going serial..." << std::endl;
+ cv::VideoCapture cap(input);
+
+ auto cc = pp.compile(cv::GMatDesc{CV_8U,3,cv::Size(1920,1080)},
+ cv::compile_args(kernels, networks));
+
+ avg.start();
+ while (cv::waitKey(1) < 0) {
+ cap >> frame;
+ if (frame.empty()) break;
+
+ cc(cv::gin(frame),
+ cv::gout(frame, vehicles, out_colors, out_types, plates, out_numbers));
+ frames++;
+ labels::DrawResults(frame, vehicles, out_colors, out_types, plates, out_numbers);
+ labels::DrawFPS(frame, frames, avg.fps(frames));
+ if (!no_show) cv::imshow("Out", frame);
+ }
+ } else {
+ std::cout << "Going pipelined..." << std::endl;
+
+ auto cc = pp.compileStreaming(cv::GMatDesc{CV_8U,3,cv::Size(1920,1080)},
+ cv::compile_args(kernels, networks));
+
+ cc.setSource(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input));
+
+ avg.start();
+ cc.start();
+
+ // Implement different execution policies depending on the display option
+ // for the best performance.
+ while (cc.running()) {
+ auto out_vector = cv::gout(frame, vehicles, out_colors, out_types, plates, out_numbers);
+ if (no_show) {
+ // This is purely a video processing. No need to balance with UI rendering.
+ // Use a blocking pull() to obtain data. Break the loop if the stream is over.
+ if (!cc.pull(std::move(out_vector)))
+ break;
+ } else if (!cc.try_pull(std::move(out_vector))) {
+ // Use a non-blocking try_pull() to obtain data.
+ // If there's no data, let UI refresh (and handle keypress)
+ if (cv::waitKey(1) >= 0) break;
+ else continue;
+ }
+ // At this point we have data for sure (obtained in either blocking or non-blocking way).
+ frames++;
+ labels::DrawResults(frame, vehicles, out_colors, out_types, plates, out_numbers);
+ labels::DrawFPS(frame, frames, avg.fps(frames));
+ if (!no_show) cv::imshow("Out", frame);
+ }
+ cc.stop();
+ }
+ std::cout << "Processed " << frames << " frames in " << avg.elapsed() << std::endl;
+
+ return 0;
+}
+#else
+int main()
+{
+ std::cerr << "This tutorial code requires G-API module "
+ "with Inference Engine backend to run"
+ << std::endl;
+ return 1;
+}
+#endif // HAVE_OPECV_GAPI