struct DataLayer : public Layer
{
void finalize(const std::vector<Mat*>&, std::vector<Mat>&) CV_OVERRIDE {}
- void forward(std::vector<Mat*>&, std::vector<Mat>&, std::vector<Mat> &) CV_OVERRIDE {}
- void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) CV_OVERRIDE {}
+
+ void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) CV_OVERRIDE
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
+ forward_ocl(inputs, outputs, internals));
+
+ Layer::forward_fallback(inputs, outputs, internals);
+ }
+
+ void forward(std::vector<Mat*>&, std::vector<Mat>& outputs, std::vector<Mat> &) CV_OVERRIDE
+ {
+ for (int i = 0; i < inputsData.size(); ++i)
+ {
+ if (inputsData[i].type() == CV_32F && outputs[i].type() == CV_16S)
+ {
+ convertFp16(inputsData[i], outputs[i]);
+ }
+ }
+ }
+
+#ifdef HAVE_OPENCL
+ bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
+ {
+ if (outputs_.depth() == CV_16S)
+ {
+ std::vector<UMat> outputs;
+ outputs_.getUMatVector(outputs);
+ for (int i = 0; i < inputsData.size(); ++i)
+ {
+ convertFp16(inputsData[i], outputs[i]);
+ }
+ }
+ return true;
+ }
+#endif
int outputNameToIndex(const String& tgtName) CV_OVERRIDE
{
}
std::vector<String> outNames;
+ std::vector<Mat> inputsData;
};
struct BlobManager
poolingLayer->computeMaxIdx = true;
}
}
- it = layers.find(0);
- CV_Assert(it != layers.end());
- it->second.skip = true;
layersTimings.clear();
}
allocateLayer(*i, layersShapes);
//bind inputs
- ld.inputBlobs.resize(ninputs);
- ld.inputBlobsWrappers.resize(ninputs);
- for (size_t i = 0; i < ninputs; i++)
+ if (ld.id == 0) // DataLayer
+ {
+ ninputs = netInputLayer->inputsData.size();
+ ld.inputBlobsWrappers.resize(ninputs);
+ for (size_t i = 0; i < ninputs; i++)
+ {
+ ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]);
+ }
+ }
+ else
{
- LayerPin from = ld.inputBlobsId[i];
- CV_Assert(from.valid());
- CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
- ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
- ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
+ ld.inputBlobs.resize(ninputs);
+ ld.inputBlobsWrappers.resize(ninputs);
+ for (size_t i = 0; i < ninputs; i++)
+ {
+ LayerPin from = ld.inputBlobsId[i];
+ CV_Assert(from.valid());
+ CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
+ ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
+ ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
+ }
}
LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid);
// TODO: OpenCL target support more fusion styles.
if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
- ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling")) )
+ ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
+ ld.layerInstance->type != "Concat")) )
continue;
Ptr<Layer>& currLayer = ld.layerInstance;
ld.outputBlobs.size() == 1 )
{
Mat& output = ld.outputBlobs[0];
+ UMat umat_output;
+ if (!ld.outputBlobsWrappers.empty() &&
+ (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)))
+ {
+ size_t i, ninputs = ld.inputBlobsId.size();
+ bool conv_layer = true;
+ for( i = 0; i < ninputs; i++ )
+ {
+ LayerPin pin = ld.inputBlobsId[i];
+ LayerData* inp_i_data = &layers[pin.lid];
+ while(inp_i_data->skip &&
+ inp_i_data->inputBlobsId.size() == 1 &&
+ inp_i_data->consumers.size() == 1)
+ {
+ pin = inp_i_data->inputBlobsId[0];
+ inp_i_data = &layers[pin.lid];
+ }
+ conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution");
+ }
+ if (!conv_layer)
+ continue;
+ std::vector<UMat> umat_outputBlobs;
+ umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
+ umat_output = umat_outputBlobs[0];
+ }
// TODO: in general, this optimization can always be done, but
// many layers currently check that the input/output blobs are
// Allocate new memory to prevent collisions during memory
// reusing (see https://github.com/opencv/opencv/pull/10456).
output = output.clone();
+ if (preferableBackend == DNN_BACKEND_OPENCV &&
+ IS_DNN_OPENCL_TARGET(preferableTarget))
+ {
+ std::vector<UMat> umats(1);
+ umat_output = umat_output.clone();
+ umats[0] = umat_output;
+ OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats);
+ }
Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
int ofs = 0;
for( i = 0; i < ninputs; i++ )
CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size);
Mat* oldPtr = &curr_output;
curr_output = output_slice;
+ if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
+ {
+ std::vector<UMat> umats(inp_i_data->outputBlobsWrappers.size());
+ umats[pin.oid] = umat_output(chrange);
+ OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats);
+ }
// Layers that refer old input Mat will refer to the
// new data but the same Mat object.
CV_Assert(curr_output.data == output_slice.data, oldPtr == &curr_output);
ShapesVec inputShapes;
for(int i = 0; i < layers[0].outputBlobs.size(); i++)
{
- CV_Assert(layers[0].outputBlobs[i].total());
- if (layers[0].outputBlobs[i].depth() == CV_32F &&
- preferableBackend == DNN_BACKEND_OPENCV &&
+ Mat& inp = layers[0].outputBlobs[i];
+ CV_Assert(inp.total());
+ if (preferableBackend == DNN_BACKEND_OPENCV &&
preferableTarget == DNN_TARGET_OPENCL_FP16)
{
- Mat mat = layers[0].outputBlobs[i].clone();
- convertFp16(mat, layers[0].outputBlobs[i]);
+ layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
}
- inputShapes.push_back(shape(layers[0].outputBlobs[i]));
+ inputShapes.push_back(shape(inp));
}
LayersShapesMap layersShapes;
getLayersShapes(inputShapes, layersShapes);
CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found");
LayerData &ld = impl->layers[pin.lid];
- ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
- ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
- MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
- Mat blob_;
- if (impl->preferableBackend == DNN_BACKEND_OPENCV &&
- impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
- {
- Mat blob_mat = blob.getMat();
- convertFp16(blob_mat, blob_);
- }
- else
- {
- blob_ = blob.getMat();
- }
+ const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size());
+ ld.outputBlobs.resize(numInputs);
+ ld.outputBlobsWrappers.resize(numInputs);
+ impl->netInputLayer->inputsData.resize(numInputs);
+
+ MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]);
+ Mat blob_ = blob.getMat();
bool oldShape = prevShape == shape(blob_);
if (oldShape)
{
- blob_.copyTo(ld.outputBlobs[pin.oid]);
+ blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
}
else
{
ld.outputBlobs[pin.oid] = blob_.clone();
+ impl->netInputLayer->inputsData[pin.oid] = ld.outputBlobs[pin.oid];
}
if (!ld.outputBlobsWrappers[pin.oid].empty())
model + (config.empty() ? "" : ", " + config));
}
+ Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
+ const std::vector<uchar>& bufferConfig)
+ {
+ String framework = _framework.toLowerCase();
+ if (framework == "caffe")
+ return readNetFromCaffe(bufferConfig, bufferModel);
+ else if (framework == "tensorflow")
+ return readNetFromTensorflow(bufferModel, bufferConfig);
+ else if (framework == "darknet")
+ return readNetFromDarknet(bufferConfig, bufferModel);
+ else if (framework == "torch")
+ CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers");
+ else if (framework == "dldt")
+ CV_Error(Error::StsNotImplemented, "Reading Intel's Model Optimizer models from buffers");
+ CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
+ }
+
Net readNetFromModelOptimizer(const String &xml, const String &bin)
{
return Net::readFromModelOptimizer(xml, bin);