modules/gapi/src/backends/fluid/gfluidbackend.cpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4 //
   5 // Copyright (C) 2018-2020 Intel Corporation
   6
   7
   8 #include "precomp.hpp"
   9
  10 #include <functional>
  11 #include <iostream>
  12 #include <iomanip> // std::fixed, std::setprecision
  13 #include <set>
  14 #include <unordered_set>
  15 #include <stack>
  16
  17 #include <ade/util/algorithm.hpp>
  18 #include <ade/util/chain_range.hpp>
  19 #include <ade/util/iota_range.hpp>
  20 #include <ade/util/range.hpp>
  21 #include <ade/util/zip_range.hpp>
  22
  23 #include <ade/typed_graph.hpp>
  24 #include <ade/execution_engine/execution_engine.hpp>
  25
  26 #include <opencv2/gapi/gcommon.hpp>
  27 #include "logger.hpp"
  28
  29 #include <opencv2/gapi/gmat.hpp>    //for version of descr_of
  30 // PRIVATE STUFF!
  31 #include "compiler/gobjref.hpp"
  32 #include "compiler/gmodel.hpp"
  33
  34 #include "backends/fluid/gfluidbuffer_priv.hpp"
  35 #include "backends/fluid/gfluidbackend.hpp"
  36
  37 #include "api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK!
  38
  39 // FIXME: Is there a way to take a typed graph (our GModel),
  40 // and create a new typed graph _ATOP_ of that (by extending with a couple of
  41 // new types?).
  42 // Alternatively, is there a way to compose types graphs?
  43 //
  44 // If not, we need to introduce that!
  45 using GFluidModel = ade::TypedGraph
  46     < cv::gimpl::FluidUnit
  47     , cv::gimpl::FluidData
  48     , cv::gimpl::Protocol
  49     , cv::gimpl::FluidUseOwnBorderBuffer
  50     >;
  51
  52 // FIXME: Same issue with Typed and ConstTyped
  53 using GConstFluidModel = ade::ConstTypedGraph
  54     < cv::gimpl::FluidUnit
  55     , cv::gimpl::FluidData
  56     , cv::gimpl::Protocol
  57     , cv::gimpl::FluidUseOwnBorderBuffer
  58     >;
  59
  60 // FluidBackend middle-layer implementation ////////////////////////////////////
  61 namespace
  62 {
  63     class GFluidBackendImpl final: public cv::gapi::GBackend::Priv
  64     {
  65         virtual void unpackKernel(ade::Graph            &graph,
  66                                   const ade::NodeHandle &op_node,
  67                                   const cv::GKernelImpl &impl) override
  68         {
  69             GFluidModel fm(graph);
  70             auto fluid_impl = cv::util::any_cast<cv::GFluidKernel>(impl.opaque);
  71             fm.metadata(op_node).set(cv::gimpl::FluidUnit{fluid_impl, {}, 0, -1, {}, 0.0});
  72         }
  73
  74         virtual EPtr compile(const ade::Graph &graph,
  75                              const cv::GCompileArgs &args,
  76                              const std::vector<ade::NodeHandle> &nodes) const override
  77         {
  78             using namespace cv::gimpl;
  79             GModel::ConstGraph g(graph);
  80             auto isl_graph = g.metadata().get<IslandModel>().model;
  81             GIslandModel::Graph gim(*isl_graph);
  82
  83             const auto num_islands = std::count_if
  84                 (gim.nodes().begin(), gim.nodes().end(),
  85                  [&](const ade::NodeHandle &nh) {
  86                     return gim.metadata(nh).get<NodeKind>().k == NodeKind::ISLAND;
  87                 });
  88
  89             const auto out_rois = cv::gapi::getCompileArg<cv::GFluidOutputRois>(args);
  90             if (num_islands > 1 && out_rois.has_value())
  91                 cv::util::throw_error(std::logic_error("GFluidOutputRois feature supports only one-island graphs"));
  92
  93             auto rois = out_rois.value_or(cv::GFluidOutputRois());
  94
  95             auto graph_data = fluidExtractInputDataFromGraph(graph, nodes);
  96             const auto parallel_out_rois = cv::gapi::getCompileArg<cv::GFluidParallelOutputRois>(args);
  97             const auto gpfor             = cv::gapi::getCompileArg<cv::GFluidParallelFor>(args);
  98
  99 #if !defined(GAPI_STANDALONE)
 100             auto default_pfor = [](std::size_t count, std::function<void(std::size_t)> f){
 101                 struct Body : cv::ParallelLoopBody {
 102                     decltype(f) func;
 103                     Body( decltype(f) && _f) : func(_f){}
 104                     virtual void operator() (const cv::Range& r) const CV_OVERRIDE
 105                     {
 106                         for (std::size_t i : ade::util::iota(r.start, r.end))
 107                         {
 108                             func(i);
 109                         }
 110                     }
 111                 };
 112                 cv::parallel_for_(cv::Range{0,static_cast<int>(count)}, Body{std::move(f)});
 113             };
 114 #else
 115             auto default_pfor = [](std::size_t count, std::function<void(std::size_t)> f){
 116                 for (auto i : ade::util::iota(count)){
 117                     f(i);
 118                 }
 119             };
 120 #endif
 121
 122             auto pfor  = gpfor.has_value() ? gpfor.value().parallel_for : default_pfor;
 123
 124             return parallel_out_rois.has_value() ?
 125                        EPtr{new cv::gimpl::GParallelFluidExecutable (graph, graph_data, std::move(parallel_out_rois.value().parallel_rois), pfor)}
 126                      : EPtr{new cv::gimpl::GFluidExecutable         (graph, graph_data, std::move(rois.rois))}
 127             ;
 128         }
 129
 130         virtual void addMetaSensitiveBackendPasses(ade::ExecutionEngineSetupContext &ectx) override;
 131
 132     };
 133 }
 134
 135 cv::gapi::GBackend cv::gapi::fluid::backend()
 136 {
 137     static cv::gapi::GBackend this_backend(std::make_shared<GFluidBackendImpl>());
 138     return this_backend;
 139 }
 140
 141 // FluidAgent implementation ///////////////////////////////////////////////////
 142
 143 namespace cv { namespace gimpl {
 144 struct FluidMapper
 145 {
 146     FluidMapper(double ratio, int lpi) : m_ratio(ratio), m_lpi(lpi) {}
 147     virtual ~FluidMapper() = default;
 148     virtual int firstWindow(int outCoord, int lpi) const = 0;
 149     virtual std::pair<int,int> linesReadAndNextWindow(int outCoord, int lpi) const = 0;
 150
 151 protected:
 152     double m_ratio = 0.0;
 153     int    m_lpi   = 0;
 154 };
 155
 156 struct FluidDownscaleMapper : public FluidMapper
 157 {
 158     virtual int firstWindow(int outCoord, int lpi) const override;
 159     virtual std::pair<int,int> linesReadAndNextWindow(int outCoord, int lpi) const override;
 160     using FluidMapper::FluidMapper;
 161 };
 162
 163 struct FluidUpscaleMapper : public FluidMapper
 164 {
 165     virtual int firstWindow(int outCoord, int lpi) const override;
 166     virtual std::pair<int,int> linesReadAndNextWindow(int outCoord, int lpi) const override;
 167     FluidUpscaleMapper(double ratio, int lpi, int inHeight) : FluidMapper(ratio, lpi), m_inHeight(inHeight) {}
 168 private:
 169     int m_inHeight = 0;
 170 };
 171
 172 struct FluidFilterAgent : public FluidAgent
 173 {
 174 private:
 175     virtual int firstWindow(std::size_t inPort) const override;
 176     virtual std::pair<int,int> linesReadAndnextWindow(std::size_t inPort) const override;
 177     virtual void setRatio(double) override { /* nothing */ }
 178 public:
 179     using FluidAgent::FluidAgent;
 180     int m_window;
 181
 182     FluidFilterAgent(const ade::Graph &g, ade::NodeHandle nh)
 183         : FluidAgent(g, nh)
 184         , m_window(GConstFluidModel(g).metadata(nh).get<FluidUnit>().window)
 185     {}
 186 };
 187
 188 struct FluidResizeAgent : public FluidAgent
 189 {
 190 private:
 191     virtual int firstWindow(std::size_t inPort) const override;
 192     virtual std::pair<int,int> linesReadAndnextWindow(std::size_t inPort) const override;
 193     virtual void setRatio(double ratio) override;
 194
 195     std::unique_ptr<FluidMapper> m_mapper;
 196 public:
 197     using FluidAgent::FluidAgent;
 198 };
 199
 200 struct Fluid420toRGBAgent : public FluidAgent
 201 {
 202 private:
 203     virtual int firstWindow(std::size_t inPort) const override;
 204     virtual std::pair<int,int> linesReadAndnextWindow(std::size_t inPort) const override;
 205     virtual void setRatio(double) override { /* nothing */ }
 206 public:
 207     using FluidAgent::FluidAgent;
 208 };
 209 }} // namespace cv::gimpl
 210
 211 cv::gimpl::FluidAgent::FluidAgent(const ade::Graph &g, ade::NodeHandle nh)
 212     : k(GConstFluidModel(g).metadata(nh).get<FluidUnit>().k)        // init(0)
 213     , op_handle(nh)                                                 // init(1)
 214     , op_name(GModel::ConstGraph(g).metadata(nh).get<Op>().k.name)  // init(2)
 215 {
 216     std::set<int> out_w;
 217     std::set<int> out_h;
 218     GModel::ConstGraph cm(g);
 219     for (auto out_data : nh->outNodes())
 220     {
 221         const auto  &d      = cm.metadata(out_data).get<Data>();
 222         cv::GMatDesc d_meta = cv::util::get<cv::GMatDesc>(d.meta);
 223         out_w.insert(d_meta.size.width);
 224         out_h.insert(d_meta.size.height);
 225     }
 226
 227     // Different output sizes are not supported
 228     GAPI_Assert(out_w.size() == 1 && out_h.size() == 1);
 229 }
 230
 231 void cv::gimpl::FluidAgent::reset()
 232 {
 233     m_producedLines = 0;
 234
 235     for (const auto& it : ade::util::indexed(in_views))
 236     {
 237         auto& v = ade::util::value(it);
 238         if (v)
 239         {
 240             auto idx = ade::util::index(it);
 241             auto lines = firstWindow(idx);
 242             v.priv().reset(lines);
 243         }
 244     }
 245 }
 246
 247 namespace {
 248 static int calcGcd (int n1, int n2)
 249 {
 250     return (n2 == 0) ? n1 : calcGcd (n2, n1 % n2);
 251 }
 252
 253 // This is an empiric formula and this is not 100% guaranteed
 254 // that it produces correct results in all possible cases
 255 // FIXME:
 256 // prove correctness or switch to some trusted method
 257 //
 258 // When performing resize input/output pixels form a cyclic
 259 // pattern where inH/gcd input pixels are mapped to outH/gcd
 260 // output pixels (pattern repeats gcd times).
 261 //
 262 // Output pixel can partually cover some of the input pixels.
 263 // There are 3 possible cases:
 264 //
 265 // :___ ___:    :___ _:_ ___:    :___ __: ___ :__ ___:
 266 // |___|___|    |___|_:_|___|    |___|__:|___|:__|___|
 267 // :       :    :     :     :    :      :     :      :
 268 //
 269 // 1) No partial coverage, max window = scaleFactor;
 270 // 2) Partial coverage occurs on the one side of the output pixel,
 271 //    max window = scaleFactor + 1;
 272 // 3) Partial coverage occurs at both sides of the output pixel,
 273 //    max window = scaleFactor + 2;
 274 //
 275 // Type of the coverage is determined by remainder of
 276 // inPeriodH/outPeriodH division, but it's an heuristic
 277 // (howbeit didn't found the proof of the opposite so far).
 278
 279 static int calcResizeWindow(int inH, int outH)
 280 {
 281     GAPI_Assert(inH >= outH);
 282     auto gcd = calcGcd(inH, outH);
 283     int  inPeriodH =  inH/gcd;
 284     int outPeriodH = outH/gcd;
 285     int scaleFactor = inPeriodH / outPeriodH;
 286
 287     switch ((inPeriodH) % (outPeriodH))
 288     {
 289     case 0:  return scaleFactor;     break;
 290     case 1:  return scaleFactor + 1; break;
 291     default: return scaleFactor + 2;
 292     }
 293 }
 294
 295 static int maxLineConsumption(const cv::GFluidKernel::Kind kind, int window, int inH, int outH, int lpi, std::size_t inPort)
 296 {
 297     switch (kind)
 298     {
 299     case cv::GFluidKernel::Kind::Filter: return window + lpi - 1; break;
 300     case cv::GFluidKernel::Kind::Resize:
 301     {
 302         if  (inH >= outH)
 303         {
 304             // FIXME:
 305             // This is a suboptimal value, can be reduced
 306             return calcResizeWindow(inH, outH) * lpi;
 307         }
 308         else
 309         {
 310             // FIXME:
 311             // This is a suboptimal value, can be reduced
 312             return (inH == 1) ? 1 : 2 + lpi - 1;
 313         }
 314     } break;
 315     case cv::GFluidKernel::Kind::YUV420toRGB: return inPort == 0 ? 2 : 1; break;
 316     default: GAPI_Assert(false); return 0;
 317     }
 318 }
 319
 320 static int borderSize(const cv::GFluidKernel::Kind kind, int window)
 321 {
 322     switch (kind)
 323     {
 324     case cv::GFluidKernel::Kind::Filter: return (window - 1) / 2; break;
 325     // Resize never reads from border pixels
 326     case cv::GFluidKernel::Kind::Resize: return 0; break;
 327     case cv::GFluidKernel::Kind::YUV420toRGB: return 0; break;
 328     default: GAPI_Assert(false); return 0;
 329     }
 330 }
 331
 332 inline double inCoord(int outIdx, double ratio)
 333 {
 334     return outIdx * ratio;
 335 }
 336
 337 inline int windowStart(int outIdx, double ratio)
 338 {
 339     return static_cast<int>(inCoord(outIdx, ratio) + 1e-3);
 340 }
 341
 342 inline int windowEnd(int outIdx, double ratio)
 343 {
 344     return static_cast<int>(std::ceil(inCoord(outIdx + 1, ratio) - 1e-3));
 345 }
 346
 347 inline double inCoordUpscale(int outCoord, double ratio)
 348 {
 349     // Calculate the projection of output pixel's center
 350     return (outCoord + 0.5) * ratio - 0.5;
 351 }
 352
 353 inline int upscaleWindowStart(int outCoord, double ratio)
 354 {
 355     int start = static_cast<int>(inCoordUpscale(outCoord, ratio));
 356     GAPI_DbgAssert(start >= 0);
 357     return start;
 358 }
 359
 360 inline int upscaleWindowEnd(int outCoord, double ratio, int inSz)
 361 {
 362     int end = static_cast<int>(std::ceil(inCoordUpscale(outCoord, ratio)) + 1);
 363     if (end > inSz)
 364     {
 365         end = inSz;
 366     }
 367     return end;
 368 }
 369 } // anonymous namespace
 370
 371 int cv::gimpl::FluidDownscaleMapper::firstWindow(int outCoord, int lpi) const
 372 {
 373     return windowEnd(outCoord + lpi - 1, m_ratio) - windowStart(outCoord, m_ratio);
 374 }
 375
 376 std::pair<int,int> cv::gimpl::FluidDownscaleMapper::linesReadAndNextWindow(int outCoord, int lpi) const
 377 {
 378     auto nextStartIdx = outCoord + 1 + m_lpi - 1;
 379     auto nextEndIdx   = nextStartIdx + lpi - 1;
 380
 381     auto currStart = windowStart(outCoord, m_ratio);
 382     auto nextStart = windowStart(nextStartIdx, m_ratio);
 383     auto nextEnd   = windowEnd(nextEndIdx, m_ratio);
 384
 385     auto lines_read = nextStart - currStart;
 386     auto next_window = nextEnd - nextStart;
 387
 388     return std::make_pair(lines_read, next_window);
 389 }
 390
 391 int cv::gimpl::FluidUpscaleMapper::firstWindow(int outCoord, int lpi) const
 392 {
 393     return upscaleWindowEnd(outCoord + lpi - 1, m_ratio, m_inHeight) - upscaleWindowStart(outCoord, m_ratio);
 394 }
 395
 396 std::pair<int,int> cv::gimpl::FluidUpscaleMapper::linesReadAndNextWindow(int outCoord, int lpi) const
 397 {
 398     auto nextStartIdx = outCoord + 1 + m_lpi - 1;
 399     auto nextEndIdx   = nextStartIdx + lpi - 1;
 400
 401     auto currStart = upscaleWindowStart(outCoord, m_ratio);
 402     auto nextStart = upscaleWindowStart(nextStartIdx, m_ratio);
 403     auto nextEnd   = upscaleWindowEnd(nextEndIdx, m_ratio, m_inHeight);
 404
 405     auto lines_read = nextStart - currStart;
 406     auto next_window = nextEnd - nextStart;
 407
 408     return std::make_pair(lines_read, next_window);
 409 }
 410
 411 int cv::gimpl::FluidFilterAgent::firstWindow(std::size_t) const
 412 {
 413     int lpi = std::min(k.m_lpi, m_outputLines - m_producedLines);
 414     return m_window + lpi - 1;
 415 }
 416
 417 std::pair<int,int> cv::gimpl::FluidFilterAgent::linesReadAndnextWindow(std::size_t) const
 418 {
 419     int lpi = std::min(k.m_lpi, m_outputLines - m_producedLines - k.m_lpi);
 420     return std::make_pair(k.m_lpi, m_window - 1 + lpi);
 421 }
 422
 423 int cv::gimpl::FluidResizeAgent::firstWindow(std::size_t) const
 424 {
 425     auto outIdx = out_buffers[0]->priv().y();
 426     auto lpi = std::min(m_outputLines - m_producedLines, k.m_lpi);
 427     return m_mapper->firstWindow(outIdx, lpi);
 428 }
 429
 430 std::pair<int,int> cv::gimpl::FluidResizeAgent::linesReadAndnextWindow(std::size_t) const
 431 {
 432     auto outIdx = out_buffers[0]->priv().y();
 433     auto lpi = std::min(m_outputLines - m_producedLines - k.m_lpi, k.m_lpi);
 434     return m_mapper->linesReadAndNextWindow(outIdx, lpi);
 435 }
 436
 437 int cv::gimpl::Fluid420toRGBAgent::firstWindow(std::size_t inPort) const
 438 {
 439     // 2 lines for Y, 1 for UV
 440     return inPort == 0 ? 2 : 1;
 441 }
 442
 443 std::pair<int,int> cv::gimpl::Fluid420toRGBAgent::linesReadAndnextWindow(std::size_t inPort) const
 444 {
 445     // 2 lines for Y, 1 for UV
 446     return inPort == 0 ? std::make_pair(2, 2) : std::make_pair(1, 1);
 447 }
 448
 449 void cv::gimpl::FluidResizeAgent::setRatio(double ratio)
 450 {
 451     if (ratio >= 1.0)
 452     {
 453         m_mapper.reset(new FluidDownscaleMapper(ratio, k.m_lpi));
 454     }
 455     else
 456     {
 457         m_mapper.reset(new FluidUpscaleMapper(ratio, k.m_lpi, in_views[0].meta().size.height));
 458     }
 459 }
 460
 461 bool cv::gimpl::FluidAgent::canRead() const
 462 {
 463     // An agent can work if every input buffer have enough data to start
 464     for (const auto& in_view : in_views)
 465     {
 466         if (in_view)
 467         {
 468             if (!in_view.ready())
 469                 return false;
 470         }
 471     }
 472     return true;
 473 }
 474
 475 bool cv::gimpl::FluidAgent::canWrite() const
 476 {
 477     // An agent can work if there is space to write in its output
 478     // allocated buffers
 479     GAPI_DbgAssert(!out_buffers.empty());
 480     auto out_begin = out_buffers.begin();
 481     auto out_end   = out_buffers.end();
 482     if (k.m_scratch) out_end--;
 483     for (auto it = out_begin; it != out_end; ++it)
 484     {
 485         if ((*it)->priv().full())
 486         {
 487             return false;
 488         }
 489     }
 490     return true;
 491 }
 492
 493 bool cv::gimpl::FluidAgent::canWork() const
 494 {
 495     return canRead() && canWrite();
 496 }
 497
 498 void cv::gimpl::FluidAgent::doWork()
 499 {
 500     GAPI_DbgAssert(m_outputLines > m_producedLines);
 501     for (auto& in_view : in_views)
 502     {
 503         if (in_view) in_view.priv().prepareToRead();
 504     }
 505
 506     k.m_f(in_args, out_buffers);
 507
 508     for (const auto& it : ade::util::indexed(in_views))
 509     {
 510         auto& in_view = ade::util::value(it);
 511
 512         if (in_view)
 513         {
 514             auto idx = ade::util::index(it);
 515             auto pair = linesReadAndnextWindow(idx);
 516             in_view.priv().readDone(pair.first, pair.second);
 517         };
 518     }
 519
 520     for (auto out_buf : out_buffers)
 521     {
 522         out_buf->priv().writeDone();
 523         // FIXME WARNING: Scratch buffers rotated here too!
 524     }
 525
 526     m_producedLines += k.m_lpi;
 527 }
 528
 529 bool cv::gimpl::FluidAgent::done() const
 530 {
 531     // m_producedLines is a multiple of LPI, while original
 532     // height may be not.
 533     return m_producedLines >= m_outputLines;
 534 }
 535
 536 void cv::gimpl::FluidAgent::debug(std::ostream &os)
 537 {
 538     os << "Fluid Agent " << std::hex << this
 539        << " (" << op_name << ") --"
 540        << " canWork=" << std::boolalpha << canWork()
 541        << " canRead=" << std::boolalpha << canRead()
 542        << " canWrite=" << std::boolalpha << canWrite()
 543        << " done="    << done()
 544        << " lines="   << std::dec << m_producedLines << "/" << m_outputLines
 545        << " {{\n";
 546     for (auto out_buf : out_buffers)
 547     {
 548         out_buf->debug(os);
 549     }
 550     std::cout << "}}" << std::endl;
 551 }
 552
 553 // GCPUExcecutable implementation //////////////////////////////////////////////
 554
 555 void cv::gimpl::GFluidExecutable::initBufferRois(std::vector<int>& readStarts,
 556                                                  std::vector<cv::Rect>& rois,
 557                                                  const std::vector<cv::Rect>& out_rois)
 558 {
 559     GConstFluidModel fg(m_g);
 560     auto proto = m_gm.metadata().get<Protocol>();
 561     std::stack<ade::NodeHandle> nodesToVisit;
 562
 563     // FIXME?
 564     // There is possible case when user pass the vector full of default Rect{}-s,
 565     // Can be diagnosed and handled appropriately
 566     if (proto.outputs.size() != out_rois.size())
 567     {
 568         GAPI_Assert(out_rois.size() == 0);
 569         // No inference required, buffers will obtain roi from meta
 570         return;
 571     }
 572
 573     // First, initialize rois for output nodes, add them to traversal stack
 574     for (const auto& it : ade::util::indexed(proto.out_nhs))
 575     {
 576         const auto idx = ade::util::index(it);
 577         const auto nh  = ade::util::value(it);
 578
 579         const auto &d  = m_gm.metadata(nh).get<Data>();
 580
 581         // This is not our output
 582         if (m_id_map.count(d.rc) == 0)
 583         {
 584             continue;
 585         }
 586
 587         if (d.shape == GShape::GMAT)
 588         {
 589             auto desc = util::get<GMatDesc>(d.meta);
 590             auto id = m_id_map.at(d.rc);
 591             readStarts[id] = 0;
 592
 593             const auto& out_roi = out_rois[idx];
 594             if (out_roi == cv::Rect{})
 595             {
 596                 rois[id] = cv::Rect{ 0, 0, desc.size.width, desc.size.height };
 597             }
 598             else
 599             {
 600                 GAPI_Assert(out_roi.height > 0);
 601                 GAPI_Assert(out_roi.y + out_roi.height <= desc.size.height);
 602
 603                 // Only slices are supported at the moment
 604                 GAPI_Assert(out_roi.x == 0);
 605                 GAPI_Assert(out_roi.width == desc.size.width);
 606                 rois[id] = out_roi;
 607             }
 608
 609             nodesToVisit.push(nh);
 610         }
 611     }
 612
 613     // Perform a wide search from each of the output nodes
 614     // And extend roi of buffers by border_size
 615     // Each node can be visited multiple times
 616     // (if node has been already visited, the check that inferred rois are the same is performed)
 617     while (!nodesToVisit.empty())
 618     {
 619         const auto startNode = nodesToVisit.top();
 620         nodesToVisit.pop();
 621
 622         if (!startNode->inNodes().empty())
 623         {
 624             GAPI_Assert(startNode->inNodes().size() == 1);
 625             const auto& oh = startNode->inNodes().front();
 626
 627             const auto& data = m_gm.metadata(startNode).get<Data>();
 628             // only GMats participate in the process so it's valid to obtain GMatDesc
 629             const auto& meta = util::get<GMatDesc>(data.meta);
 630
 631             for (const auto& in_edge : oh->inEdges())
 632             {
 633                 const auto& in_node = in_edge->srcNode();
 634                 const auto& in_data = m_gm.metadata(in_node).get<Data>();
 635
 636                 if (in_data.shape == GShape::GMAT && fg.metadata(in_node).contains<FluidData>())
 637                 {
 638                     const auto& in_meta = util::get<GMatDesc>(in_data.meta);
 639                     const auto& fd = fg.metadata(in_node).get<FluidData>();
 640
 641                     auto adjFilterRoi = [](cv::Rect produced, int b, int max_height) {
 642                         // Extend with border roi which should be produced, crop to logical image size
 643                         cv::Rect roi = {produced.x, produced.y - b, produced.width, produced.height + 2*b};
 644                         cv::Rect fullImg{ 0, 0, produced.width, max_height };
 645                         return roi & fullImg;
 646                     };
 647
 648                     auto adjResizeRoi = [](cv::Rect produced, cv::Size inSz, cv::Size outSz) {
 649                         auto map = [](int outCoord, int producedSz, int inSize, int outSize) {
 650                             double ratio = (double)inSize / outSize;
 651                             int w0 = 0, w1 = 0;
 652                             if (ratio >= 1.0)
 653                             {
 654                                 w0 = windowStart(outCoord, ratio);
 655                                 w1 = windowEnd  (outCoord + producedSz - 1, ratio);
 656                             }
 657                             else
 658                             {
 659                                 w0 = upscaleWindowStart(outCoord, ratio);
 660                                 w1 = upscaleWindowEnd(outCoord + producedSz - 1, ratio, inSize);
 661                             }
 662                             return std::make_pair(w0, w1);
 663                         };
 664
 665                         auto mapY = map(produced.y, produced.height, inSz.height, outSz.height);
 666                         auto y0 = mapY.first;
 667                         auto y1 = mapY.second;
 668
 669                         auto mapX = map(produced.x, produced.width, inSz.width, outSz.width);
 670                         auto x0 = mapX.first;
 671                         auto x1 = mapX.second;
 672
 673                         cv::Rect roi = {x0, y0, x1 - x0, y1 - y0};
 674                         return roi;
 675                     };
 676
 677                     auto adj420Roi = [&](cv::Rect produced, std::size_t port) {
 678                         GAPI_Assert(produced.x % 2 == 0);
 679                         GAPI_Assert(produced.y % 2 == 0);
 680                         GAPI_Assert(produced.width % 2 == 0);
 681                         GAPI_Assert(produced.height % 2 == 0);
 682
 683                         cv::Rect roi;
 684                         switch (port) {
 685                         case 0: roi = produced; break;
 686                         case 1:
 687                         case 2: roi = cv::Rect{ produced.x/2, produced.y/2, produced.width/2, produced.height/2 }; break;
 688                         default: GAPI_Assert(false);
 689                         }
 690                         return roi;
 691                     };
 692
 693                     cv::Rect produced = rois[m_id_map.at(data.rc)];
 694
 695                     // Apply resize-specific roi transformations
 696                     cv::Rect resized;
 697                     switch (fg.metadata(oh).get<FluidUnit>().k.m_kind)
 698                     {
 699                     case GFluidKernel::Kind::Filter:      resized = produced; break;
 700                     case GFluidKernel::Kind::Resize:      resized = adjResizeRoi(produced, in_meta.size, meta.size); break;
 701                     case GFluidKernel::Kind::YUV420toRGB: resized = adj420Roi(produced, m_gm.metadata(in_edge).get<Input>().port); break;
 702                     default: GAPI_Assert(false);
 703                     }
 704
 705                     // All below transformations affect roi of the writer, preserve read start position here
 706                     int readStart = resized.y;
 707
 708                     // Extend required input roi (both y and height) to be even if it's produced by CS420toRGB
 709                     if (!in_node->inNodes().empty()) {
 710                         auto in_data_producer = in_node->inNodes().front();
 711                         if (fg.metadata(in_data_producer).get<FluidUnit>().k.m_kind == GFluidKernel::Kind::YUV420toRGB) {
 712                             if (resized.y % 2 != 0) {
 713                                 resized.y--;
 714                                 resized.height++;
 715                             }
 716
 717                             if (resized.height % 2 != 0) {
 718                                 resized.height++;
 719                             }
 720                         }
 721                     }
 722
 723                     // Apply filter-specific roi transformations, clip to image size
 724                     // Note: done even for non-filter kernels as applies border-related transformations
 725                     // (required in the case when there are multiple readers with different border requirements)
 726                     auto roi = adjFilterRoi(resized, fd.border_size, in_meta.size.height);
 727
 728                     auto in_id = m_id_map.at(in_data.rc);
 729                     if (rois[in_id] == cv::Rect{})
 730                     {
 731                         readStarts[in_id] = readStart;
 732                         rois[in_id] = roi;
 733                         // Continue traverse on internal (w.r.t Island) data nodes only.
 734                         if (fd.internal) nodesToVisit.push(in_node);
 735                     }
 736                     else
 737                     {
 738                         GAPI_Assert(readStarts[in_id] == readStart);
 739                         GAPI_Assert(rois[in_id] == roi);
 740                     }
 741                 } // if (in_data.shape == GShape::GMAT)
 742             } // for (const auto& in_edge : oh->inEdges())
 743         } // if (!startNode->inNodes().empty())
 744     } // while (!nodesToVisit.empty())
 745 }
 746
 747 cv::gimpl::FluidGraphInputData cv::gimpl::fluidExtractInputDataFromGraph(const ade::Graph &g, const std::vector<ade::NodeHandle> &nodes)
 748 {
 749     decltype(FluidGraphInputData::m_agents_data)       agents_data;
 750     decltype(FluidGraphInputData::m_scratch_users)     scratch_users;
 751     decltype(FluidGraphInputData::m_id_map)            id_map;
 752     decltype(FluidGraphInputData::m_all_gmat_ids)      all_gmat_ids;
 753     std::size_t                                        mat_count = 0;
 754
 755     GConstFluidModel fg(g);
 756     GModel::ConstGraph m_gm(g);
 757
 758     // Initialize vector of data buffers, build list of operations
 759     // FIXME: There _must_ be a better way to [query] count number of DATA nodes
 760
 761     auto grab_mat_nh = [&](ade::NodeHandle nh) {
 762         auto rc = m_gm.metadata(nh).get<Data>().rc;
 763         if (id_map.count(rc) == 0)
 764         {
 765             all_gmat_ids[mat_count] = nh;
 766             id_map[rc] = mat_count++;
 767         }
 768     };
 769
 770     std::size_t last_agent = 0;
 771
 772     for (const auto &nh : nodes)
 773     {
 774         switch (m_gm.metadata(nh).get<NodeType>().t)
 775         {
 776         case NodeType::DATA:
 777             if (m_gm.metadata(nh).get<Data>().shape == GShape::GMAT)
 778                 grab_mat_nh(nh);
 779             break;
 780
 781         case NodeType::OP:
 782         {
 783             const auto& fu = fg.metadata(nh).get<FluidUnit>();
 784
 785             agents_data.push_back({fu.k.m_kind, nh, {}, {}});
 786             // NB.: in_buffer_ids size is equal to Arguments size, not Edges size!!!
 787             agents_data.back().in_buffer_ids.resize(m_gm.metadata(nh).get<Op>().args.size(), -1);
 788             for (auto eh : nh->inEdges())
 789             {
 790                 // FIXME Only GMats are currently supported (which can be represented
 791                 // as fluid buffers
 792                 if (m_gm.metadata(eh->srcNode()).get<Data>().shape == GShape::GMAT)
 793                 {
 794                     const auto in_port = m_gm.metadata(eh).get<Input>().port;
 795                     const int  in_buf  = m_gm.metadata(eh->srcNode()).get<Data>().rc;
 796
 797                     agents_data.back().in_buffer_ids[in_port] = in_buf;
 798                     grab_mat_nh(eh->srcNode());
 799                 }
 800             }
 801             // FIXME: Assumption that all operation outputs MUST be connected
 802             agents_data.back().out_buffer_ids.resize(nh->outEdges().size(), -1);
 803             for (auto eh : nh->outEdges())
 804             {
 805                 const auto& data = m_gm.metadata(eh->dstNode()).get<Data>();
 806                 const auto out_port = m_gm.metadata(eh).get<Output>().port;
 807                 const int  out_buf  = data.rc;
 808
 809                 agents_data.back().out_buffer_ids[out_port] = out_buf;
 810                 if (data.shape == GShape::GMAT) grab_mat_nh(eh->dstNode());
 811             }
 812             if (fu.k.m_scratch)
 813                 scratch_users.push_back(last_agent);
 814             last_agent++;
 815             break;
 816         }
 817         default: GAPI_Assert(false);
 818         }
 819     }
 820
 821     // Check that IDs form a continiuos set (important for further indexing)
 822     GAPI_Assert(id_map.size() >  0);
 823     GAPI_Assert(id_map.size() == static_cast<size_t>(mat_count));
 824
 825     return FluidGraphInputData {std::move(agents_data), std::move(scratch_users), std::move(id_map), std::move(all_gmat_ids), mat_count};
 826 }
 827
 828 cv::gimpl::GFluidExecutable::GFluidExecutable(const ade::Graph                       &g,
 829                                               const cv::gimpl::FluidGraphInputData   &traverse_res,
 830                                               const std::vector<cv::Rect> &outputRois)
 831     : m_g(g), m_gm(m_g),
 832       m_num_int_buffers (traverse_res.m_mat_count),
 833       m_scratch_users   (traverse_res.m_scratch_users),
 834       m_id_map          (traverse_res.m_id_map),
 835       m_all_gmat_ids    (traverse_res.m_all_gmat_ids),
 836       m_buffers(m_num_int_buffers + m_scratch_users.size())
 837 {
 838     GConstFluidModel fg(m_g);
 839
 840     auto create_fluid_agent = [&g](agent_data_t const& agent_data) -> std::unique_ptr<FluidAgent> {
 841         std::unique_ptr<FluidAgent> agent_ptr;
 842         switch (agent_data.kind)
 843         {
 844             case GFluidKernel::Kind::Filter:      agent_ptr.reset(new FluidFilterAgent(g, agent_data.nh));      break;
 845             case GFluidKernel::Kind::Resize:      agent_ptr.reset(new FluidResizeAgent(g, agent_data.nh));      break;
 846             case GFluidKernel::Kind::YUV420toRGB: agent_ptr.reset(new Fluid420toRGBAgent(g, agent_data.nh));    break;
 847             default: GAPI_Assert(false);
 848         }
 849         std::tie(agent_ptr->in_buffer_ids, agent_ptr->out_buffer_ids) = std::tie(agent_data.in_buffer_ids, agent_data.out_buffer_ids);
 850         return agent_ptr;
 851     };
 852
 853     for (auto const& agent_data : traverse_res.m_agents_data){
 854         m_agents.push_back(create_fluid_agent(agent_data));
 855     }
 856
 857     // Actually initialize Fluid buffers
 858     GAPI_LOG_INFO(NULL, "Initializing " << m_num_int_buffers << " fluid buffer(s)" << std::endl);
 859
 860     // After buffers are allocated, repack: ...
 861     for (auto &agent : m_agents)
 862     {
 863         // a. Agent input parameters with View pointers (creating Views btw)
 864         const auto &op = m_gm.metadata(agent->op_handle).get<Op>();
 865         const auto &fu =   fg.metadata(agent->op_handle).get<FluidUnit>();
 866         agent->in_args.resize(op.args.size());
 867         agent->in_views.resize(op.args.size());
 868         for (auto it : ade::util::indexed(ade::util::toRange(agent->in_buffer_ids)))
 869         {
 870             auto in_idx  = ade::util::index(it);
 871             auto buf_idx = ade::util::value(it);
 872
 873             if (buf_idx >= 0)
 874             {
 875                 // IF there is input buffer, register a view (every unique
 876                 // reader has its own), and store it in agent Args
 877                 gapi::fluid::Buffer &buffer = m_buffers.at(m_id_map.at(buf_idx));
 878
 879                 auto inEdge = GModel::getInEdgeByPort(m_g, agent->op_handle, in_idx);
 880                 auto ownStorage = fg.metadata(inEdge).get<FluidUseOwnBorderBuffer>().use;
 881
 882                 // NB: It is safe to keep ptr as view lifetime is buffer lifetime
 883                 agent->in_views[in_idx] = buffer.mkView(fu.border_size, ownStorage);
 884                 agent->in_args[in_idx]  = GArg(&agent->in_views[in_idx]);
 885                 buffer.addView(&agent->in_views[in_idx]);
 886             }
 887             else
 888             {
 889                 // Copy(FIXME!) original args as is
 890                 agent->in_args[in_idx] = op.args[in_idx];
 891             }
 892         }
 893
 894         // b. Agent output parameters with Buffer pointers.
 895         agent->out_buffers.resize(agent->op_handle->outEdges().size(), nullptr);
 896         for (auto it : ade::util::indexed(ade::util::toRange(agent->out_buffer_ids)))
 897         {
 898             auto out_idx = ade::util::index(it);
 899             auto buf_idx = m_id_map.at(ade::util::value(it));
 900             agent->out_buffers.at(out_idx) = &m_buffers.at(buf_idx);
 901         }
 902     }
 903
 904     // After parameters are there, initialize scratch buffers
 905     const std::size_t num_scratch = m_scratch_users.size();
 906     if (num_scratch)
 907     {
 908         GAPI_LOG_INFO(NULL, "Initializing " << num_scratch << " scratch buffer(s)" << std::endl);
 909         std::size_t last_scratch_id = 0;
 910
 911         for (auto i : m_scratch_users)
 912         {
 913             auto &agent = m_agents.at(i);
 914             GAPI_Assert(agent->k.m_scratch);
 915             const std::size_t new_scratch_idx = m_num_int_buffers + last_scratch_id;
 916             agent->out_buffers.emplace_back(&m_buffers[new_scratch_idx]);
 917             last_scratch_id++;
 918         }
 919     }
 920
 921     makeReshape(outputRois);
 922
 923     GAPI_LOG_INFO(NULL, "Internal buffers: " << std::fixed << std::setprecision(2) << static_cast<float>(total_buffers_size())/1024 << " KB\n");
 924 }
 925
 926 std::size_t cv::gimpl::GFluidExecutable::total_buffers_size() const
 927 {
 928     GConstFluidModel fg(m_g);
 929     std::size_t total_size = 0;
 930     for (const auto &i : ade::util::indexed(m_buffers))
 931     {
 932         // Check that all internal and scratch buffers are allocated
 933         const auto  idx = ade::util::index(i);
 934         const auto& b   = ade::util::value(i);
 935         if (idx >= m_num_int_buffers ||
 936             fg.metadata(m_all_gmat_ids.at(idx)).get<FluidData>().internal == true)
 937         {
 938             GAPI_Assert(b.priv().size() > 0);
 939         }
 940
 941         // Buffers which will be bound to real images may have size of 0 at this moment
 942         // (There can be non-zero sized const border buffer allocated in such buffers)
 943         total_size += b.priv().size();
 944     }
 945     return total_size;
 946 }
 947
 948 namespace
 949 {
 950     void resetFluidData(ade::Graph& graph)
 951     {
 952         using namespace cv::gimpl;
 953         GModel::Graph g(graph);
 954         GFluidModel fg(graph);
 955         for (const auto node : g.nodes())
 956         {
 957             if (g.metadata(node).get<NodeType>().t == NodeType::DATA)
 958             {
 959                 auto& fd = fg.metadata(node).get<FluidData>();
 960                 fd.latency         = 0;
 961                 fd.skew            = 0;
 962                 fd.max_consumption = 0;
 963             }
 964
 965             GModel::log_clear(g, node);
 966         }
 967     }
 968
 969     void initFluidUnits(ade::Graph& graph)
 970     {
 971         using namespace cv::gimpl;
 972         GModel::Graph g(graph);
 973         GFluidModel fg(graph);
 974
 975         auto sorted = g.metadata().get<ade::passes::TopologicalSortData>().nodes();
 976         for (auto node : sorted)
 977         {
 978             if (fg.metadata(node).contains<FluidUnit>())
 979             {
 980                 std::set<int> in_hs, out_ws, out_hs;
 981
 982                 for (const auto& in : node->inNodes())
 983                 {
 984                     const auto& d = g.metadata(in).get<Data>();
 985                     if (d.shape == cv::GShape::GMAT)
 986                     {
 987                         const auto& meta = cv::util::get<cv::GMatDesc>(d.meta);
 988                         in_hs.insert(meta.size.height);
 989                     }
 990                 }
 991
 992                 for (const auto& out : node->outNodes())
 993                 {
 994                     const auto& d = g.metadata(out).get<Data>();
 995                     if (d.shape == cv::GShape::GMAT)
 996                     {
 997                         const auto& meta = cv::util::get<cv::GMatDesc>(d.meta);
 998                         out_ws.insert(meta.size.width);
 999                         out_hs.insert(meta.size.height);
1000                     }
1001                 }
1002
1003                 auto &fu = fg.metadata(node).get<FluidUnit>();
1004
1005                 GAPI_Assert((out_ws.size() == 1 && out_hs.size() == 1) &&
1006                             ((in_hs.size() == 1) ||
1007                             ((in_hs.size() == 2) && fu.k.m_kind == cv::GFluidKernel::Kind::YUV420toRGB)));
1008
1009                 const auto &op = g.metadata(node).get<Op>();
1010                 fu.line_consumption.resize(op.args.size(), 0);
1011
1012                 auto in_h  = *in_hs .cbegin();
1013                 auto out_h = *out_hs.cbegin();
1014
1015                 fu.ratio = (double)in_h / out_h;
1016
1017                 // Set line consumption for each image (GMat) input
1018                 for (const auto& in_edge : node->inEdges())
1019                 {
1020                     const auto& d = g.metadata(in_edge->srcNode()).get<Data>();
1021                     if (d.shape == cv::GShape::GMAT)
1022                     {
1023                         auto port = g.metadata(in_edge).get<Input>().port;
1024                         fu.line_consumption[port] = maxLineConsumption(fu.k.m_kind, fu.window, in_h, out_h, fu.k.m_lpi, port);
1025
1026                         GModel::log(g, node, "Line consumption (port " + std::to_string(port) + "): "
1027                                     + std::to_string(fu.line_consumption[port]));
1028                     }
1029                 }
1030
1031                 fu.border_size = borderSize(fu.k.m_kind, fu.window);
1032                 GModel::log(g, node, "Border size: " + std::to_string(fu.border_size));
1033             }
1034         }
1035     }
1036
1037     // FIXME!
1038     // Split into initLineConsumption and initBorderSizes,
1039     // call only consumption related stuff during reshape
1040     void initLineConsumption(ade::Graph& graph)
1041     {
1042         using namespace cv::gimpl;
1043         GModel::Graph g(graph);
1044         GFluidModel fg(graph);
1045
1046         for (const auto &node : g.nodes())
1047         {
1048             if (fg.metadata(node).contains<FluidUnit>())
1049             {
1050                 const auto &fu = fg.metadata(node).get<FluidUnit>();
1051
1052                 for (const auto &in_edge : node->inEdges())
1053                 {
1054                     const auto &in_data_node = in_edge->srcNode();
1055                     auto port = g.metadata(in_edge).get<Input>().port;
1056
1057                     auto &fd = fg.metadata(in_data_node).get<FluidData>();
1058
1059                     // Update (not Set) fields here since a single data node may be
1060                     // accessed by multiple consumers
1061                     fd.max_consumption = std::max(fu.line_consumption[port], fd.max_consumption);
1062                     fd.border_size     = std::max(fu.border_size, fd.border_size);
1063
1064                     GModel::log(g, in_data_node, "Line consumption: " + std::to_string(fd.max_consumption)
1065                                 + " (upd by " + std::to_string(fu.line_consumption[port]) + ")", node);
1066                     GModel::log(g, in_data_node, "Border size: " + std::to_string(fd.border_size), node);
1067                 }
1068             }
1069         }
1070     }
1071
1072     void calcLatency(ade::Graph& graph)
1073     {
1074         using namespace cv::gimpl;
1075         GModel::Graph g(graph);
1076         GFluidModel fg(graph);
1077
1078         auto sorted = g.metadata().get<ade::passes::TopologicalSortData>().nodes();
1079         for (const auto &node : sorted)
1080         {
1081             if (fg.metadata(node).contains<FluidUnit>())
1082             {
1083                 const auto &fu = fg.metadata(node).get<FluidUnit>();
1084
1085                 GModel::log(g, node, "LPI: " + std::to_string(fu.k.m_lpi));
1086
1087                 // Output latency is max(input_latency) + own_latency
1088                 int out_latency = 0;
1089                 for (const auto &in_edge: node->inEdges())
1090                 {
1091                     // FIXME: ASSERT(DATA), ASSERT(FLUIDDATA)
1092                     const auto port = g.metadata(in_edge).get<Input>().port;
1093                     const auto own_latency = fu.line_consumption[port] - fu.border_size;
1094                     const auto in_latency = fg.metadata(in_edge->srcNode()).get<FluidData>().latency;
1095                     out_latency = std::max(out_latency, in_latency + own_latency);
1096                 }
1097
1098                 for (const auto &out_data_node : node->outNodes())
1099                 {
1100                     // FIXME: ASSERT(DATA), ASSERT(FLUIDDATA)
1101                     auto &fd     = fg.metadata(out_data_node).get<FluidData>();
1102                     // If fluid node is external, it will be bound to a real image without
1103                     // fluid buffer allocation, so set its latency to 0 not to confuse later latency propagation.
1104                     // Latency is used in fluid buffer allocation process and is not used by the scheduler
1105                     // so latency doesn't affect the execution and setting it to 0 is legal
1106                     fd.latency   = fd.internal ? out_latency : 0;
1107                     fd.lpi_write = fu.k.m_lpi;
1108                     GModel::log(g, out_data_node, "Latency: " + std::to_string(fd.latency));
1109                 }
1110             }
1111         }
1112     }
1113
1114     void calcSkew(ade::Graph& graph)
1115     {
1116         using namespace cv::gimpl;
1117         GModel::Graph g(graph);
1118         GFluidModel fg(graph);
1119
1120         auto sorted = g.metadata().get<ade::passes::TopologicalSortData>().nodes();
1121         for (const auto &node : sorted)
1122         {
1123             if (fg.metadata(node).contains<FluidUnit>())
1124             {
1125                 int max_latency = 0;
1126                 for (const auto &in_data_node : node->inNodes())
1127                 {
1128                     // FIXME: ASSERT(DATA), ASSERT(FLUIDDATA)
1129                     max_latency = std::max(max_latency, fg.metadata(in_data_node).get<FluidData>().latency);
1130                 }
1131                 for (const auto &in_data_node : node->inNodes())
1132                 {
1133                     // FIXME: ASSERT(DATA), ASSERT(FLUIDDATA)
1134                     auto &fd = fg.metadata(in_data_node).get<FluidData>();
1135
1136                     // Update (not Set) fields here since a single data node may be
1137                     // accessed by multiple consumers
1138                     fd.skew = std::max(fd.skew, max_latency - fd.latency);
1139
1140                     GModel::log(g, in_data_node, "Skew: " + std::to_string(fd.skew), node);
1141                 }
1142             }
1143         }
1144     }
1145 }
1146
1147 void cv::gimpl::GFluidExecutable::makeReshape(const std::vector<cv::Rect> &out_rois)
1148 {
1149     GConstFluidModel fg(m_g);
1150
1151     // Calculate rois for each fluid buffer
1152     std::vector<int> readStarts(m_num_int_buffers);
1153     std::vector<cv::Rect> rois(m_num_int_buffers);
1154     initBufferRois(readStarts, rois, out_rois);
1155
1156     // NB: Allocate ALL buffer object at once, and avoid any further reallocations
1157     // (since raw pointers-to-elements are taken)
1158     for (const auto &it : m_all_gmat_ids)
1159     {
1160         auto id = it.first;
1161         auto nh = it.second;
1162         const auto & d  = m_gm.metadata(nh).get<Data>();
1163         const auto &fd  = fg.metadata(nh).get<FluidData>();
1164         const auto meta = cv::util::get<GMatDesc>(d.meta);
1165
1166         m_buffers[id].priv().init(meta, fd.lpi_write, readStarts[id], rois[id]);
1167
1168         // TODO:
1169         // Introduce Storage::INTERNAL_GRAPH and Storage::INTERNAL_ISLAND?
1170         if (fd.internal == true)
1171         {
1172             // FIXME: do max_consumption calculation properly (e.g. in initLineConsumption)
1173             int max_consumption = 0;
1174             if (nh->outNodes().empty()) {
1175                 // nh is always a DATA node, so it is safe to get inNodes().front() since there's
1176                 // always a single writer (OP node)
1177                 max_consumption = fg.metadata(nh->inNodes().front()).get<FluidUnit>().k.m_lpi;
1178             } else {
1179                 max_consumption = fd.max_consumption;
1180             }
1181             m_buffers[id].priv().allocate(fd.border, fd.border_size, max_consumption, fd.skew);
1182             std::stringstream stream;
1183             m_buffers[id].debug(stream);
1184             GAPI_LOG_INFO(NULL, stream.str());
1185         }
1186     }
1187
1188     // Allocate views, initialize agents
1189     for (auto &agent : m_agents)
1190     {
1191         const auto &fu = fg.metadata(agent->op_handle).get<FluidUnit>();
1192         for (auto it : ade::util::indexed(ade::util::toRange(agent->in_buffer_ids)))
1193         {
1194             auto in_idx  = ade::util::index(it);
1195             auto buf_idx = ade::util::value(it);
1196
1197             if (buf_idx >= 0)
1198             {
1199                 agent->in_views[in_idx].priv().allocate(fu.line_consumption[in_idx], fu.border);
1200             }
1201         }
1202
1203         agent->setRatio(fu.ratio);
1204         agent->m_outputLines = agent->out_buffers.front()->priv().outputLines();
1205     }
1206
1207     // Initialize scratch buffers
1208     if (m_scratch_users.size())
1209     {
1210         for (auto i : m_scratch_users)
1211         {
1212             auto &agent = m_agents.at(i);
1213             GAPI_Assert(agent->k.m_scratch);
1214
1215             // Trigger Scratch buffer initialization method
1216             agent->k.m_is(GModel::collectInputMeta(m_gm, agent->op_handle), agent->in_args, *agent->out_buffers.back());
1217             std::stringstream stream;
1218             agent->out_buffers.back()->debug(stream);
1219             GAPI_LOG_INFO(NULL, stream.str());
1220         }
1221     }
1222
1223     // FIXME: calculate the size (lpi * ..)
1224     m_script.clear();
1225     m_script.reserve(10000);
1226 }
1227
1228 void cv::gimpl::GFluidExecutable::reshape(ade::Graph &g, const GCompileArgs &args)
1229 {
1230     // FIXME: Probably this needs to be integrated into common pass re-run routine
1231     // Backends may want to mark with passes to re-run on reshape and framework could
1232     // do it system-wide (without need in every backend handling reshape() directly).
1233     // This design needs to be analyzed for implementation.
1234     resetFluidData(g);
1235     initFluidUnits(g);
1236     initLineConsumption(g);
1237     calcLatency(g);
1238     calcSkew(g);
1239     const auto out_rois = cv::gapi::getCompileArg<cv::GFluidOutputRois>(args).value_or(cv::GFluidOutputRois());
1240     makeReshape(out_rois.rois);
1241 }
1242
1243 // FIXME: Document what it does
1244 void cv::gimpl::GFluidExecutable::bindInArg(const cv::gimpl::RcDesc &rc, const GRunArg &arg)
1245 {
1246     magazine::bindInArg(m_res, rc, arg);
1247     if (rc.shape == GShape::GMAT) {
1248         auto& mat = m_res.slot<cv::Mat>()[rc.id];
1249         // fluid::Buffer::bindTo() is not connected to magazine::bindIn/OutArg and unbind() calls,
1250         // it's simply called each run() without any requirement to call some fluid-specific
1251         // unbind() at the end of run()
1252         m_buffers[m_id_map.at(rc.id)].priv().bindTo(mat, true);
1253     }
1254 }
1255
1256 void cv::gimpl::GFluidExecutable::bindOutArg(const cv::gimpl::RcDesc &rc, const GRunArgP &arg)
1257 {
1258     // Only GMat is supported as return type
1259     if (rc.shape != GShape::GMAT) {
1260         util::throw_error(std::logic_error("Unsupported return GShape type"));
1261     }
1262     magazine::bindOutArg(m_res, rc, arg);
1263     auto& mat = m_res.slot<cv::Mat>()[rc.id];
1264     m_buffers[m_id_map.at(rc.id)].priv().bindTo(mat, false);
1265 }
1266
1267 void cv::gimpl::GFluidExecutable::packArg(cv::GArg &in_arg, const cv::GArg &op_arg)
1268 {
1269     GAPI_Assert(op_arg.kind != cv::detail::ArgKind::GMAT
1270            && op_arg.kind != cv::detail::ArgKind::GSCALAR
1271            && op_arg.kind != cv::detail::ArgKind::GARRAY
1272            && op_arg.kind != cv::detail::ArgKind::GOPAQUE);
1273
1274     if (op_arg.kind == cv::detail::ArgKind::GOBJREF)
1275     {
1276         const cv::gimpl::RcDesc &ref = op_arg.get<cv::gimpl::RcDesc>();
1277         if (ref.shape == GShape::GSCALAR)
1278         {
1279             in_arg = GArg(m_res.slot<cv::Scalar>()[ref.id]);
1280         }
1281         else if (ref.shape == GShape::GARRAY)
1282         {
1283             in_arg = GArg(m_res.slot<cv::detail::VectorRef>()[ref.id]);
1284         }
1285         else if (ref.shape == GShape::GOPAQUE)
1286         {
1287             in_arg = GArg(m_res.slot<cv::detail::OpaqueRef>()[ref.id]);
1288         }
1289     }
1290 }
1291
1292 void cv::gimpl::GFluidExecutable::run(std::vector<InObj>  &&input_objs,
1293                                       std::vector<OutObj> &&output_objs)
1294 {
1295     run(input_objs, output_objs);
1296 }
1297 void cv::gimpl::GFluidExecutable::run(std::vector<InObj>  &input_objs,
1298                                       std::vector<OutObj> &output_objs)
1299 {
1300     // Bind input buffers from parameters
1301     for (auto& it : input_objs)  bindInArg(it.first, it.second);
1302     for (auto& it : output_objs) bindOutArg(it.first, it.second);
1303
1304     // Reset Buffers and Agents state before we go
1305     for (auto &buffer : m_buffers)
1306         buffer.priv().reset();
1307
1308     for (auto &agent : m_agents)
1309     {
1310         agent->reset();
1311         // Pass input cv::Scalar's to agent argument
1312         const auto& op = m_gm.metadata(agent->op_handle).get<Op>();
1313         for (const auto& it : ade::util::indexed(op.args))
1314         {
1315             const auto& arg = ade::util::value(it);
1316             packArg(agent->in_args[ade::util::index(it)], arg);
1317         }
1318     }
1319
1320     // Explicitly reset Scratch buffers, if any
1321     for (auto scratch_i : m_scratch_users)
1322     {
1323         auto &agent = m_agents[scratch_i];
1324         GAPI_DbgAssert(agent->k.m_scratch);
1325         agent->k.m_rs(*agent->out_buffers.back());
1326     }
1327
1328     // Now start executing our stuff!
1329     // Fluid execution is:
1330     // - run through list of Agents from Left to Right
1331     // - for every Agent:
1332     //   - if all input Buffers have enough data to fulfill
1333     //     Agent's window - trigger Agent
1334     //     - on trigger, Agent takes all input lines from input buffers
1335     //       and produces a single output line
1336     //     - once Agent finishes, input buffers get "readDone()",
1337     //       and output buffers get "writeDone()"
1338     //   - if there's not enough data, Agent is skipped
1339     // Yes, THAT easy!
1340
1341     if (m_script.empty())
1342     {
1343         bool complete = true;
1344         do {
1345             complete = true;
1346             bool work_done=false;
1347             for (auto &agent : m_agents)
1348             {
1349                 // agent->debug(std::cout);
1350                 if (!agent->done())
1351                 {
1352                     if (agent->canWork())
1353                     {
1354                         agent->doWork(); work_done=true;
1355                         m_script.push_back(agent.get());
1356                     }
1357                     if (!agent->done())   complete = false;
1358                 }
1359             }
1360             GAPI_Assert(work_done || complete);
1361         } while (!complete); // FIXME: number of iterations can be calculated statically
1362     }
1363     else
1364     {
1365         for (auto &agent : m_script)
1366         {
1367             agent->doWork();
1368         }
1369     }
1370
1371     // In/Out args clean-up is mandatory now with RMat
1372     for (auto &it : input_objs) magazine::unbind(m_res, it.first);
1373     for (auto &it : output_objs) magazine::unbind(m_res, it.first);
1374 }
1375
1376 cv::gimpl::GParallelFluidExecutable::GParallelFluidExecutable(const ade::Graph                      &g,
1377                                                               const FluidGraphInputData             &graph_data,
1378                                                               const std::vector<GFluidOutputRois>   &parallelOutputRois,
1379                                                               const decltype(parallel_for)          &pfor)
1380 : parallel_for(pfor)
1381 {
1382     for (auto&& rois : parallelOutputRois){
1383         tiles.emplace_back(new GFluidExecutable(g, graph_data, rois.rois));
1384     }
1385 }
1386
1387
1388 void cv::gimpl::GParallelFluidExecutable::reshape(ade::Graph&, const GCompileArgs& )
1389 {
1390     //TODO: implement ?
1391     GAPI_Assert(false && "Not Implemented;");
1392 }
1393
1394 void cv::gimpl::GParallelFluidExecutable::run(std::vector<InObj>  &&input_objs,
1395                                               std::vector<OutObj> &&output_objs)
1396 {
1397     parallel_for(tiles.size(), [&, this](std::size_t index){
1398         GAPI_Assert((bool)tiles[index]);
1399         tiles[index]->run(input_objs, output_objs);
1400     });
1401 }
1402
1403
1404 // FIXME: these passes operate on graph global level!!!
1405 // Need to fix this for heterogeneous (island-based) processing
1406 void GFluidBackendImpl::addMetaSensitiveBackendPasses(ade::ExecutionEngineSetupContext &ectx)
1407 {
1408     using namespace cv::gimpl;
1409
1410     // FIXME: all passes were moved to "exec" stage since Fluid
1411     // should check Islands configuration first (which is now quite
1412     // limited), and only then continue with all other passes.
1413     //
1414     // The passes/stages API must be streamlined!
1415     ectx.addPass("exec", "init_fluid_data", [](ade::passes::PassContext &ctx)
1416     {
1417         GModel::Graph g(ctx.graph);
1418         if (!GModel::isActive(g, cv::gapi::fluid::backend()))  // FIXME: Rearchitect this!
1419             return;
1420
1421         auto isl_graph = g.metadata().get<IslandModel>().model;
1422         GIslandModel::Graph gim(*isl_graph);
1423
1424         GFluidModel fg(ctx.graph);
1425
1426         const auto setFluidData = [&](ade::NodeHandle nh, bool internal) {
1427             FluidData fd;
1428             fd.internal = internal;
1429             fg.metadata(nh).set(fd);
1430         };
1431
1432         for (const auto& nh : gim.nodes())
1433         {
1434             switch (gim.metadata(nh).get<NodeKind>().k)
1435             {
1436             case NodeKind::ISLAND:
1437             {
1438                 const auto isl = gim.metadata(nh).get<FusedIsland>().object;
1439                 if (isl->backend() == cv::gapi::fluid::backend())
1440                 {
1441                     // Add FluidData to all data nodes inside island,
1442                     // set internal = true if node is not a slot in terms of higher-level GIslandModel
1443                     for (const auto node : isl->contents())
1444                     {
1445                         if (g.metadata(node).get<NodeType>().t == NodeType::DATA &&
1446                             !fg.metadata(node).contains<FluidData>())
1447                             setFluidData(node, true);
1448                     }
1449                 } // if (fluid backend)
1450             } break; // case::ISLAND
1451             case NodeKind::SLOT:
1452             {
1453                 // add FluidData to slot if it's read/written by fluid
1454                 // regardless if it is one fluid island (both writing to and reading from this object)
1455                 // or two distinct islands (both fluid)
1456                 auto isFluidIsland = [&](const ade::NodeHandle& node) {
1457                     // With Streaming, Emitter islands may have no FusedIsland thing in meta.
1458                     // FIXME: Probably this is a concept misalignment
1459                     if (!gim.metadata(node).contains<FusedIsland>()) {
1460                         const auto kind = gim.metadata(node).get<NodeKind>().k;
1461                         GAPI_Assert(kind == NodeKind::EMIT || kind == NodeKind::SINK);
1462                         return false;
1463                     }
1464                     const auto isl = gim.metadata(node).get<FusedIsland>().object;
1465                     return isl->backend() == cv::gapi::fluid::backend();
1466                 };
1467
1468                 if (ade::util::any_of(ade::util::chain(nh->inNodes(), nh->outNodes()), isFluidIsland))
1469                 {
1470                     auto data_node = gim.metadata(nh).get<DataSlot>().original_data_node;
1471                     setFluidData(data_node, false);
1472                 }
1473             } break; // case::SLOT
1474             case NodeKind::EMIT:
1475             case NodeKind::SINK:
1476                 break; // do nothing for Streaming nodes
1477             default: GAPI_Assert(false);
1478             } // switch
1479         } // for (gim.nodes())
1480     });
1481     // FIXME:
1482     // move to unpackKernel method
1483     // when https://gitlab-icv.inn.intel.com/G-API/g-api/merge_requests/66 is merged
1484     ectx.addPass("exec", "init_fluid_unit_windows_and_borders", [](ade::passes::PassContext &ctx)
1485     {
1486         GModel::Graph g(ctx.graph);
1487         if (!GModel::isActive(g, cv::gapi::fluid::backend()))  // FIXME: Rearchitect this!
1488             return;
1489
1490         GFluidModel fg(ctx.graph);
1491
1492         auto sorted = g.metadata().get<ade::passes::TopologicalSortData>().nodes();
1493         for (auto node : sorted)
1494         {
1495             if (fg.metadata(node).contains<FluidUnit>())
1496             {
1497                 // FIXME: check that op has only one data node on input
1498                 auto &fu = fg.metadata(node).get<FluidUnit>();
1499                 const auto &op = g.metadata(node).get<Op>();
1500                 auto inputMeta = GModel::collectInputMeta(fg, node);
1501
1502                 // Trigger user-defined "getWindow" callback
1503                 fu.window = fu.k.m_gw(inputMeta, op.args);
1504
1505                 // Trigger user-defined "getBorder" callback
1506                 fu.border = fu.k.m_b(inputMeta, op.args);
1507             }
1508         }
1509     });
1510     ectx.addPass("exec", "init_fluid_units", [](ade::passes::PassContext &ctx)
1511     {
1512         GModel::Graph g(ctx.graph);
1513         if (!GModel::isActive(g, cv::gapi::fluid::backend()))  // FIXME: Rearchitect this!
1514             return;
1515
1516         initFluidUnits(ctx.graph);
1517     });
1518     ectx.addPass("exec", "init_line_consumption", [](ade::passes::PassContext &ctx)
1519     {
1520         GModel::Graph g(ctx.graph);
1521         if (!GModel::isActive(g, cv::gapi::fluid::backend()))  // FIXME: Rearchitect this!
1522             return;
1523
1524         initLineConsumption(ctx.graph);
1525     });
1526     ectx.addPass("exec", "calc_latency", [](ade::passes::PassContext &ctx)
1527     {
1528         GModel::Graph g(ctx.graph);
1529         if (!GModel::isActive(g, cv::gapi::fluid::backend()))  // FIXME: Rearchitect this!
1530             return;
1531
1532         calcLatency(ctx.graph);
1533     });
1534     ectx.addPass("exec", "calc_skew", [](ade::passes::PassContext &ctx)
1535     {
1536         GModel::Graph g(ctx.graph);
1537         if (!GModel::isActive(g, cv::gapi::fluid::backend()))  // FIXME: Rearchitect this!
1538             return;
1539
1540         calcSkew(ctx.graph);
1541     });
1542
1543     ectx.addPass("exec", "init_buffer_borders", [](ade::passes::PassContext &ctx)
1544     {
1545         GModel::Graph g(ctx.graph);
1546         if (!GModel::isActive(g, cv::gapi::fluid::backend()))  // FIXME: Rearchitect this!
1547             return;
1548
1549         GFluidModel fg(ctx.graph);
1550         auto sorted = g.metadata().get<ade::passes::TopologicalSortData>().nodes();
1551         for (auto node : sorted)
1552         {
1553             if (fg.metadata(node).contains<FluidData>())
1554             {
1555                 auto &fd = fg.metadata(node).get<FluidData>();
1556
1557                 // Assign border stuff to FluidData
1558
1559                 // In/out data nodes are bound to user data directly,
1560                 // so cannot be extended with a border
1561                 if (fd.internal == true)
1562                 {
1563                     // For now border of the buffer's storage is the border
1564                     // of the first reader whose border size is the same.
1565                     // FIXME: find more clever strategy of border picking
1566                     // (it can be a border which is common for majority of the
1567                     // readers, also we can calculate the number of lines which
1568                     // will be copied by views on each iteration and base our choice
1569                     // on this criteria)
1570                     auto readers = node->outNodes();
1571
1572                     // There can be a situation when __internal__ nodes produced as part of some
1573                     // operation are unused later in the graph:
1574                     //
1575                     // in -> OP1
1576                     //        |------> internal_1  // unused node
1577                     //        |------> internal_2 -> OP2
1578                     //                                |------> out
1579                     //
1580                     // To allow graphs like the one above, skip nodes with empty outNodes()
1581                     if (readers.empty()) {
1582                         continue;
1583                     }
1584
1585                     const auto &candidate = ade::util::find_if(readers, [&](ade::NodeHandle nh) {
1586                         return fg.metadata(nh).contains<FluidUnit>() &&
1587                                fg.metadata(nh).get<FluidUnit>().border_size == fd.border_size;
1588                     });
1589
1590                     GAPI_Assert(candidate != readers.end());
1591
1592                     const auto &fu = fg.metadata(*candidate).get<FluidUnit>();
1593                     fd.border = fu.border;
1594                 }
1595
1596                 if (fd.border)
1597                 {
1598                     GModel::log(g, node, "Border type: " + std::to_string(fd.border->type), node);
1599                 }
1600             }
1601         }
1602     });
1603     ectx.addPass("exec", "init_view_borders", [](ade::passes::PassContext &ctx)
1604     {
1605         GModel::Graph g(ctx.graph);
1606         if (!GModel::isActive(g, cv::gapi::fluid::backend()))  // FIXME: Rearchitect this!
1607             return;
1608
1609         GFluidModel fg(ctx.graph);
1610         for (auto node : g.nodes())
1611         {
1612             if (fg.metadata(node).contains<FluidData>())
1613             {
1614                 auto &fd = fg.metadata(node).get<FluidData>();
1615                 for (auto out_edge : node->outEdges())
1616                 {
1617                     const auto dstNode = out_edge->dstNode();
1618                     if (fg.metadata(dstNode).contains<FluidUnit>())
1619                     {
1620                         const auto &fu = fg.metadata(dstNode).get<FluidUnit>();
1621
1622                         // There is no need in own storage for view if it's border is
1623                         // the same as the buffer's (view can have equal or smaller border
1624                         // size in this case)
1625                         if (fu.border_size == 0 ||
1626                                 (fu.border && fd.border && (*fu.border == *fd.border)))
1627                         {
1628                             GAPI_Assert(fu.border_size <= fd.border_size);
1629                             fg.metadata(out_edge).set(FluidUseOwnBorderBuffer{false});
1630                         }
1631                         else
1632                         {
1633                             fg.metadata(out_edge).set(FluidUseOwnBorderBuffer{true});
1634                             GModel::log(g, out_edge, "OwnBufferStorage: true");
1635                         }
1636                     }
1637                 }
1638             }
1639         }
1640     });
1641 }