*/
virtual void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals) = 0;
+ /** @brief Given the @p input blobs, computes the output @p blobs.
+ * @param[in] inputs the input blobs.
+ * @param[out] outputs allocated output blobs, which will store results of the computation.
+ * @param[out] internals allocated internal blobs
+ */
+ virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) = 0;
+
+ /** @brief Given the @p input blobs, computes the output @p blobs.
+ * @param[in] inputs the input blobs.
+ * @param[out] outputs allocated output blobs, which will store results of the computation.
+ * @param[out] internals allocated internal blobs
+ */
+ void forward_fallback(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals);
+
/** @brief @overload */
CV_WRAP void finalize(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs);
/** @brief @overload */
CV_WRAP std::vector<Mat> finalize(const std::vector<Mat> &inputs);
- /** @brief @overload */
- CV_WRAP void forward(const std::vector<Mat> &inputs, CV_IN_OUT std::vector<Mat> &outputs,
- CV_IN_OUT std::vector<Mat> &internals);
-
/** @brief Allocates layer and computes output. */
CV_WRAP void run(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs,
CV_IN_OUT std::vector<Mat> &internals);
return shape(mat.size.p, mat.dims);
}
+static inline MatShape shape(const UMat& mat)
+{
+ return shape(mat.size.p, mat.dims);
+}
+
namespace {inline bool is_neg(int i) { return i < 0; }}
static inline MatShape shape(int a0, int a1=-1, int a2=-1, int a3=-1)
return 0;
int elems = 1;
- CV_Assert(start < (int)shape.size() && end <= (int)shape.size() &&
+ CV_Assert(start <= (int)shape.size() && end <= (int)shape.size() &&
start <= end);
for(int i = start; i < end; i++)
{
std::vector<Mat> outputBlobs;
std::vector<Mat*> inputBlobs;
std::vector<Mat> internals;
+ std::vector<UMat> umat_outputBlobs;
+ std::vector<UMat> umat_inputBlobs;
+ std::vector<UMat> umat_internals;
// Computation nodes of implemented backends (except DEFAULT).
std::map<int, Ptr<BackendNode> > backendNodes;
// Flag for skip layer computation for specific backend.
{
void finalize(const std::vector<Mat*>&, std::vector<Mat>&) {}
void forward(std::vector<Mat*>&, std::vector<Mat>&, std::vector<Mat> &) {}
+ void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) {}
int outputNameToIndex(String tgtName)
{
}
}
+ void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst, bool force)
+ {
+ UMat bestBlob;
+ LayerPin bestBlobPin;
+
+ if( !force )
+ {
+ std::map<LayerPin, UMat>::iterator hostIt;
+ std::map<LayerPin, int>::iterator refIt;
+
+ const int targetTotal = total(shape);
+ int bestBlobTotal = INT_MAX;
+
+ for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
+ {
+ refIt = refCounter.find(hostIt->first);
+ // Use only blobs that had references before because if not,
+ // it might be used as output.
+ if (refIt != refCounter.end() && refIt->second == 0)
+ {
+ UMat& unusedBlob = hostIt->second;
+ if (unusedBlob.total() >= targetTotal &&
+ unusedBlob.total() < bestBlobTotal)
+ {
+ bestBlobPin = hostIt->first;
+ bestBlob = unusedBlob;
+ bestBlobTotal = unusedBlob.total();
+ }
+ }
+ }
+ }
+ if (!bestBlob.empty())
+ {
+ reuse(bestBlobPin, lp);
+ umat_dst.create(shape, CV_32F);
+ }
+ else
+ {
+ // if dst already has been allocated with total(shape) elements,
+ // it won't be recrreated and pointer of dst.data remains the same.
+ umat_dst.create(shape, CV_32F);
+ addHost(lp, umat_dst);
+ }
+ }
+
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
std::vector<LayerPin>& pinsForInternalBlobs,
bool maximizeReuse)
{
CV_TRACE_FUNCTION();
+ bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
+ preferableTarget == DNN_TARGET_OPENCL);
pinsForInternalBlobs.clear();
std::vector<Mat>& outputBlobs = ld.outputBlobs,
&internalBlobs = ld.internals;
+ std::vector<UMat>& umat_outputBlobs = ld.umat_outputBlobs,
+ &umat_internalBlobs = ld.umat_internals;
+
const ShapesVec& outShapes = layerShapes.out,
internalShapes = layerShapes.internal;
outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
internalBlobs.resize(internalShapes.size());
+ if (use_umat)
+ {
+ umat_outputBlobs.resize(std::max((size_t)1, outShapes.size()));
+ umat_internalBlobs.resize(internalShapes.size());
+ }
CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
ShapesVec shapes(outShapes);
shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
std::vector<Mat*> blobs;
+ std::vector<UMat*> umat_blobs;
for(int i = 0; i < outputBlobs.size(); i++)
{
blobs.push_back(&outputBlobs[i]);
+ if (use_umat)
+ umat_blobs.push_back(&umat_outputBlobs[i]);
}
for(int i = 0; i < internalBlobs.size(); i++)
{
blobs.push_back(&internalBlobs[i]);
+ if (use_umat)
+ umat_blobs.push_back(&umat_internalBlobs[i]);
if (total(internalShapes[i]))
{
pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
LayerPin blobPin(ld.id, index);
if (index < outShapes.size() && inPlace && !force)
{
- CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
- ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
+ if (use_umat)
+ {
+ CV_Assert(ld.umat_inputBlobs[0].total() == total(shapes[index]));
+ ld.umat_outputBlobs[index] =
+ ld.umat_inputBlobs[0].reshape(1, shapes[index].size(),
+ &shapes[index][0]);
+ }
+ else
+ {
+ CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
+ ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
+ }
reuse(ld.inputBlobsId[0], blobPin);
}
else
{
- reuseOrCreate(shapes[index], blobPin, *blobs[index], force);
+ if (use_umat)
+ reuseOrCreate(shapes[index], blobPin, *umat_blobs[index], force);
+ else
+ reuseOrCreate(shapes[index], blobPin, *blobs[index], force);
}
}
}
refCounter.clear();
reuseMap.clear();
memHosts.clear();
+ umat_memHosts.clear();
+ preferableTarget = DNN_TARGET_CPU;
+ preferableBackend = DNN_BACKEND_DEFAULT;
+ }
+
+ void setPreferableTarget(int targetId)
+ {
+ preferableTarget = targetId;
+ }
+
+ void setPreferableBackend(int backendId)
+ {
+ preferableBackend = backendId;
}
private:
memHosts[lp] = mat;
}
+ void addHost(const LayerPin& lp, const UMat& umat)
+ {
+ CV_Assert(umat_memHosts.find(lp) == umat_memHosts.end());
+ reuseMap[lp] = lp;
+ umat_memHosts[lp] = umat;
+ }
+
std::map<LayerPin, int> refCounter;
// Maps pin to origin blob (for whom memory was allocated firstly).
// For origin blobs key == value.
std::map<LayerPin, LayerPin> reuseMap;
std::map<LayerPin, Mat> memHosts;
+ std::map<LayerPin, UMat> umat_memHosts;
+ int preferableTarget;
+ int preferableBackend;
};
static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, const cv::Mat& m)
it->second.inputBlobs.clear();
it->second.outputBlobs.clear();
it->second.internals.clear();
+ it->second.umat_inputBlobs.clear();
+ it->second.umat_outputBlobs.clear();
+ it->second.umat_internals.clear();
}
it->second.skipFlags.clear();
//it->second.consumers.clear();
allocateLayer(*i, layersShapes);
//bind inputs
+ bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
+ preferableTarget == DNN_TARGET_OPENCL);
ld.inputBlobs.resize(ninputs);
+ if (use_umat)
+ ld.umat_inputBlobs.resize(ninputs);
ld.inputBlobsWrappers.resize(ninputs);
for (size_t i = 0; i < ninputs; i++)
{
CV_Assert(from.valid());
CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
+ if (use_umat)
+ ld.umat_inputBlobs[i] = layers[from.lid].umat_outputBlobs[from.oid];
ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
}
Ptr<Layer> layerPtr = ld.getLayerInstance();
{
- layerPtr->finalize(ld.inputBlobs, ld.outputBlobs);
+ if (use_umat)
+ {
+ std::vector<Mat*> inputs(ld.umat_inputBlobs.size());;
+ std::vector<Mat> outputs(ld.umat_outputBlobs.size());
+ Mat mat;
+ for (int i = 0; i < inputs.size(); i++)
+ {
+ mat = ld.umat_inputBlobs[i].getMat(ACCESS_READ);
+ inputs[i] = &mat;
+ }
+ for (int i = 0; i < outputs.size(); i++)
+ {
+ outputs[i] = ld.umat_outputBlobs[i].getMat(ACCESS_READ);
+ }
+ layerPtr->finalize(inputs, outputs);
+ }
+ else
+ {
+ layerPtr->finalize(ld.inputBlobs, ld.outputBlobs);
+ }
layerPtr->preferableTarget = preferableTarget;
#if 0
std::cout << "\toutputs:";
getLayersShapes(inputShapes, layersShapes);
blobManager.reset();
+ blobManager.setPreferableTarget(preferableTarget);
+ blobManager.setPreferableBackend(preferableBackend);
backendWrappers.clear();
blobManager.addReference(LayerPin(0, 0));
for (it = layers.begin(); it != layers.end(); ++it)
if (!ld.inputBlobsWrappers[i].empty())
ld.inputBlobsWrappers[i]->copyToHost();
}
- layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals);
+ if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL)
+ layer->forward(ld.umat_inputBlobs, ld.umat_outputBlobs, ld.umat_internals);
+ else
+ layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals);
for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
{
if (!ld.outputBlobsWrappers[i].empty())
{
CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL);
}
+
+ if (ld.umat_outputBlobs.size() > 0 && !ld.umat_outputBlobs[pin.oid].empty())
+ ld.umat_outputBlobs[pin.oid].copyTo(ld.outputBlobs[pin.oid]);
+
return ld.outputBlobs[pin.oid];
}
LayerPin pin = impl->getPinByAlias(layerName);
LayerData &ld = impl->layers[pin.lid];
+
+ if (ld.umat_outputBlobs.size() > 0)
+ {
+ for (int i = 0; i < ld.umat_outputBlobs.size(); i++)
+ ld.umat_outputBlobs[i].copyTo(ld.outputBlobs[i]);
+ }
+
outputBlobs = ld.outputBlobs;
}
if( impl->preferableBackend != backendId )
{
impl->preferableBackend = backendId;
+ impl->blobManager.setPreferableBackend(backendId);
impl->netWasAllocated = false;
impl->clear();
}
if( impl->preferableTarget != targetId )
{
impl->preferableTarget = targetId;
+ impl->blobManager.setPreferableTarget(targetId);
impl->netWasAllocated = false;
impl->clear();
}
LayerData &ld = impl->layers[pin.lid];
ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
+ bool use_umat = (impl->preferableBackend == DNN_BACKEND_DEFAULT &&
+ impl->preferableTarget == DNN_TARGET_OPENCL);
+ if (use_umat)
+ ld.umat_outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
bool oldShape = prevShape == shape(blob_);
if (oldShape)
+ {
blob_.copyTo(ld.outputBlobs[pin.oid]);
+ if (use_umat)
+ blob_.copyTo(ld.umat_outputBlobs[pin.oid]);
+ }
else
+ {
ld.outputBlobs[pin.oid] = blob_.clone();
+ if (use_umat)
+ blob_.copyTo(ld.umat_outputBlobs[pin.oid]);
+ }
if (!ld.outputBlobsWrappers[pin.oid].empty())
{
return outputs;
}
-void Layer::forward(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
- std::vector<Mat*> inputsp;
- vecToPVec(inputs, inputsp);
- this->forward(inputsp, outputs, internals);
+ std::vector<Mat> inpvec;
+ std::vector<Mat> outputs;
+ std::vector<Mat> internals;
+
+ inputs_arr.getMatVector(inpvec);
+ outputs_arr.getMatVector(outputs);
+ internals_arr.getMatVector(internals);
+
+ std::vector<Mat*> inputs(inpvec.size());
+ for (int i = 0; i < inpvec.size(); i++)
+ inputs[i] = &inpvec[i];
+
+ this->forward(inputs, outputs, internals);
}
void Layer::run(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
backendId == DNN_BACKEND_HALIDE && haveHalide();
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
return true;
}
+#ifdef HAVE_OPENCL
+ bool forward_ocl(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals)
+ {
+ return true;
+ }
+#endif
+
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+ OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+ forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
};
#ifdef HAVE_OPENCL
- bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
- CV_TRACE_FUNCTION();
- CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+ std::vector<UMat> inputs;
+ std::vector<UMat> outputs;
- int cAxis = clamp(axis, inputs[0]->dims);
+ inps.getUMatVector(inputs);
+ outs.getUMatVector(outputs);
+
+ int cAxis = clamp(axis, inputs[0].dims);
if (!(cAxis == 1 && outputs[0].dims == 4 && !padding))
return false;
int bottom_concat_axis;
- int concat_size = inputs[0]->size[2] * inputs[0]->size[3];
+ int concat_size = inputs[0].size[2] * inputs[0].size[3];
int top_concat_axis = outputs[0].size[1];
int offset_concat_axis = 0;
- UMat inpMat, outMat;
- outMat = outputs[0].getUMat(ACCESS_WRITE);
-
- ocl::Kernel kernel;
- String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0]->type()) + String(" ");
- if (!kernel.create("concat", ocl::dnn::concat_oclsrc, buildopt))
- return false;
+ UMat& outMat = outputs[0];
+ String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0].type()) + String(" ");
for (size_t i = 0; i < inputs.size(); i++)
{
- inpMat = inputs[i]->getUMat(ACCESS_READ);
- bottom_concat_axis = inputs[i]->size[1];
- size_t nthreads = inputs[i]->total();
+ ocl::Kernel kernel("concat", ocl::dnn::concat_oclsrc, buildopt);
+ if (kernel.empty())
+ return false;
+
+ UMat& inpMat = inputs[i];
+ bottom_concat_axis = inputs[i].size[1];
+ size_t nthreads = inputs[i].total();
kernel.set(0, (int)nthreads);
kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat));
- kernel.set(2, (int)inputs[i]->size[0]);
+ kernel.set(2, (int)inputs[i].size[0]);
kernel.set(3, (int)concat_size);
kernel.set(4, (int)top_concat_axis);
kernel.set(5, (int)bottom_concat_axis);
}
#endif
- void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
- forward_ocl(inputs, outputs, internals))
+ forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
+ void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
int cAxis = clamp(axis, inputs[0]->dims);
Mat& outMat = outputs[0];
};
#ifdef HAVE_OPENCL
- bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
- int group = inputs[0]->size[1] / umat_blobs[0].size[1];
+ std::vector<UMat> inputs;
+ std::vector<UMat> outputs;
+
+ inps.getUMatVector(inputs);
+ outs.getUMatVector(outputs);
+
+ int group = inputs[0].size[1] / umat_blobs[0].size[1];
if (convolutionOp.empty())
{
OCL4DNNConvConfig config;
- config.in_shape = shape(*inputs[0]);
+ config.in_shape = shape(inputs[0]);
config.out_shape = shape(outputs[0]);
config.kernel = kernel;
config.pad = pad;
convolutionOp = Ptr<OCL4DNNConvSpatial<float> >(new OCL4DNNConvSpatial<float>(config));
}
+ int k, outCn = umat_blobs[0].size[0];
+ if( weightsMat.empty() )
+ {
+ // prepare weightsMat where each row is aligned and has enough zero padding on the right to
+ // use vectorized (i.e. with intrinsics) loops without tail processing
+ Mat wm = blobs[0].reshape(1, outCn).clone();
+ if( wm.step1() % VEC_ALIGN != 0 )
+ {
+ int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);
+ Mat wm_buffer = Mat(outCn, newcols, wm.type());
+ Mat wm_padding = wm_buffer.colRange(wm.cols, newcols);
+ wm_padding.setTo(Scalar::all(0.));
+ Mat wm_aligned = wm_buffer.colRange(0, wm.cols);
+ wm.copyTo(wm_aligned);
+ wm = wm_aligned;
+ }
+ weightsMat = wm;
+
+ Mat biasMat = hasBias() ? blobs[1].reshape(1, outCn) : Mat();
+ biasvec.resize(outCn+2);
+ if( biasMat.empty() )
+ {
+ for( k = 0; k < outCn; k++ )
+ biasvec[k] = 0.f;
+ }
+ else
+ {
+ for( k = 0; k < outCn; k++ )
+ biasvec[k] = biasMat.at<float>(k);
+ }
+
+ if( !bnorm.empty() || !scaleLayer.empty() )
+ {
+ Mat scale, shift, scale2, shift2;
+ const float *scaleptr = 0, *shiftptr = 0;
+ const float *scaleptr2 = 0, *shiftptr2 = 0;
+
+ if( !bnorm.empty() )
+ {
+ bnorm->getScaleShift(scale, shift);
+ CV_Assert( scale.isContinuous() && shift.isContinuous() &&
+ scale.type() == CV_32F && shift.type() == CV_32F &&
+ scale.total() == (size_t)outCn &&
+ shift.total() == (size_t)outCn );
+ scaleptr = scale.ptr<float>();
+ shiftptr = shift.ptr<float>();
+ }
+ if( !scaleLayer.empty() )
+ {
+ scale2 = scaleLayer->blobs[0];
+ CV_Assert( scale2.isContinuous() && scale2.type() == CV_32F &&
+ scale2.total() == (size_t)outCn );
+ scaleptr2 = scale2.ptr<float>();
+ if( scaleLayer->hasBias )
+ {
+ shift2 = scaleLayer->blobs[1];
+ CV_Assert( shift2.isContinuous() && shift2.type() == CV_32F &&
+ shift2.total() == (size_t)outCn );
+ shiftptr2 = shift2.ptr<float>();
+ }
+ }
+
+ if (shiftptr || shiftptr2)
+ fusedBias = true;
+
+ for( int i = 0; i < outCn; i++ )
+ {
+ float s1 = scaleptr ? scaleptr[i] : 1.f;
+ float delta1 = shiftptr ? shiftptr[i] : 0.f;
+ float s2 = scaleptr2 ? scaleptr2[i] : 1.f;
+ float delta2 = shiftptr2 ? shiftptr2[i] : 0.f;
+ float* w_i = weightsMat.ptr<float>(i);
+ int j, wcols = weightsMat.cols;
+
+ for( j = 0; j < wcols; j++ )
+ w_i[j] *= (s1*s2);
+
+ biasvec[i] = biasvec[i]*(s1*s2) + (delta1*s2 + delta2);
+ }
+ }
+ biasvec[outCn] = biasvec[outCn+1] = biasvec[outCn-1];
+ }
+
+ reluslope.clear();
+ if( activ )
+ {
+ Ptr<ReLULayer> activ_relu = activ.dynamicCast<ReLULayer>();
+ if( !activ_relu.empty() )
+ {
+ reluslope.assign(outCn+2, activ_relu->negativeSlope);
+ activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
+ }
+
+ Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
+ if( !activ_chprelu.empty() )
+ {
+ const Mat& m = activ_chprelu->blobs[0];
+ CV_Assert(m.isContinuous() && m.type() == CV_32F && (int)m.total() == outCn);
+ const float* mdata = m.ptr<float>();
+ reluslope.resize(outCn+2);
+ std::copy(mdata, mdata + outCn, reluslope.begin());
+ reluslope[outCn] = reluslope[outCn+1] = reluslope[outCn-1];
+ activType = OCL4DNN_CONV_FUSED_ACTIV_PRELU;
+ }
+ }
+
if ( newWeightAndBias )
{
weightsMat.copyTo(umat_blobs[0]);
newActiv = false;
}
- UMat inpMat, outMat;
- inpMat = inputs[0]->getUMat(ACCESS_READ);
- outMat = outputs[0].getUMat(ACCESS_WRITE);
+ UMat& inpMat = inputs[0];
+ UMat& outMat = outputs[0];
int batch_size = inpMat.size[0];
return convolutionOp->Forward(inpMat,
}
#endif
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+ OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+ forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
}
}
-#ifdef HAVE_OPENCL
- if (shiftptr || shiftptr2)
- fusedBias = true;
-#endif
-
for( int i = 0; i < outCn; i++ )
{
float s1 = scaleptr ? scaleptr[i] : 1.f;
if( !activ_relu.empty() )
{
reluslope.assign(outCn+2, activ_relu->negativeSlope);
-#ifdef HAVE_OPENCL
- activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
-#endif
}
Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
reluslope.resize(outCn+2);
std::copy(mdata, mdata + outCn, reluslope.begin());
reluslope[outCn] = reluslope[outCn+1] = reluslope[outCn-1];
-#ifdef HAVE_OPENCL
- activType = OCL4DNN_CONV_FUSED_ACTIV_PRELU;
-#endif
}
}
- CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
- OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
- forward_ocl(inputs, outputs, internals))
-
int nstripes = std::max(getNumThreads(), 1);
ParallelConv::run(*inputs[0], outputs[0], weightsMat, biasvec, reluslope,
}
};
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
}
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
return false;
}
+#ifdef HAVE_OPENCL
+ bool forward_ocl(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ std::vector<Mat> inpvec;
+ std::vector<Mat> outputs;
+
+ inputs_arr.getMatVector(inpvec);
+ outputs_arr.getMatVector(outputs);
+
+ std::vector<Mat*> inputs(inpvec.size());
+ for (size_t i = 0; i < inpvec.size(); i++)
+ inputs[i] = &inpvec[i];
+
+ std::vector<LabelBBox> allDecodedBBoxes;
+ std::vector<std::vector<std::vector<float> > > allConfidenceScores;
+
+ int num = inputs[0]->size[0];
+
+ // extract predictions from input layers
+ {
+ int numPriors = inputs[2]->size[2] / 4;
+
+ const float* locationData = inputs[0]->ptr<float>();
+ const float* confidenceData = inputs[1]->ptr<float>();
+ const float* priorData = inputs[2]->ptr<float>();
+
+ // Retrieve all location predictions
+ std::vector<LabelBBox> allLocationPredictions;
+ GetLocPredictions(locationData, num, numPriors, _numLocClasses,
+ _shareLocation, _locPredTransposed, allLocationPredictions);
+
+ // Retrieve all confidences
+ GetConfidenceScores(confidenceData, num, numPriors, _numClasses, allConfidenceScores);
+
+ // Retrieve all prior bboxes
+ std::vector<caffe::NormalizedBBox> priorBBoxes;
+ std::vector<std::vector<float> > priorVariances;
+ GetPriorBBoxes(priorData, numPriors, priorBBoxes, priorVariances);
+
+ // Decode all loc predictions to bboxes
+ DecodeBBoxesAll(allLocationPredictions, priorBBoxes, priorVariances, num,
+ _shareLocation, _numLocClasses, _backgroundLabelId,
+ _codeType, _varianceEncodedInTarget, false, allDecodedBBoxes);
+ }
+
+ size_t numKept = 0;
+ std::vector<std::map<int, std::vector<int> > > allIndices;
+ for (int i = 0; i < num; ++i)
+ {
+ numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices);
+ }
+
+ if (numKept == 0)
+ {
+ // Set confidences to zeros.
+ Range ranges[] = {Range::all(), Range::all(), Range::all(), Range(2, 3)};
+ outputs[0](ranges).setTo(0);
+ return true;
+ }
+ int outputShape[] = {1, 1, (int)numKept, 7};
+ Mat mat(4, outputShape, CV_32F);
+ float* outputsData = mat.ptr<float>();
+
+ size_t count = 0;
+ for (int i = 0; i < num; ++i)
+ {
+ count += outputDetections_(i, &outputsData[count * 7],
+ allDecodedBBoxes[i], allConfidenceScores[i],
+ allIndices[i]);
+ }
+ UMat& output = outputs_arr.getUMatRef(0);
+ output = mat.getUMat(ACCESS_READ);
+ CV_Assert(count == numKept);
+ return true;
+ }
+#endif
+
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+ OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+ forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
return true;
}
- void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_OCL_RUN((this->preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
- func.applyOCL(inputs, outputs, internals))
+ func.applyOCL(inputs_arr, outputs_arr, internals_arr))
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
+ void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ {
+ CV_TRACE_FUNCTION();
for (size_t i = 0; i < inputs.size(); i++)
{
return true;
}
- bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
+ std::vector<UMat> inputs;
+ std::vector<UMat> outputs;
+
+ inps.getUMatVector(inputs);
+ outs.getUMatVector(outputs);
for (size_t i = 0; i < inputs.size(); i++)
{
- UMat src, dst;
- inputs[i]->copyTo(src);
- dst = outputs[i].getUMat(ACCESS_WRITE);
+ UMat& src = inputs[i];
+ UMat& dst = outputs[i];
CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
- ocl::Kernel ker;
- CV_Assert(initKernel(ker, src));
- ker.set(0, (int)src.total());
- ker.set(1, ocl::KernelArg::PtrReadOnly(src));
- ker.set(2, ocl::KernelArg::PtrWriteOnly(dst));
+ ocl::Kernel kernel;
+ CV_Assert(initKernel(kernel, src));
+ kernel.set(0, (int)src.total());
+ kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
+ kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
size_t gSize = src.total();
- CV_Assert(ker.run(1, &gSize, &wgSize, false));
+ CV_Assert(kernel.run(1, &gSize, &wgSize, false));
}
return true;
}
#ifdef HAVE_OPENCL
- bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
// TODO: implement OCL version
return false;
}
#ifdef HAVE_OPENCL
- bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
// TODO: implement OCL version
return false;
}
#ifdef HAVE_OPENCL
- bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
// TODO: implement OCL version
return false;
}
#ifdef HAVE_OPENCL
- bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
// TODO: implement OCL version
return false;
}
#ifdef HAVE_OPENCL
- bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
// TODO: implement OCL version
return false;
}
#ifdef HAVE_OPENCL
- bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
// TODO: implement OCL version
return false;
}
#ifdef HAVE_OPENCL
- bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
// TODO: implement OCL version
return false;
}
#ifdef HAVE_OPENCL
- bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
// TODO: implement OCL version
return false;
}
};
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
return true;
}
+#ifdef HAVE_OPENCL
+ bool forward_ocl(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ std::vector<UMat> inpvec;
+ std::vector<UMat> outputs;
+
+ inputs_arr.getUMatVector(inpvec);
+ outputs_arr.getUMatVector(outputs);
+
+ std::vector<UMat*> inputs(inpvec.size());
+ for (int i = 0; i < inpvec.size(); i++)
+ inputs[i] = &inpvec[i];
+
+ for (size_t i = 0; i < inputs.size(); i++)
+ {
+ MatShape outShape = shape(outputs[i]);
+ UMat& output = outputs_arr.getUMatRef(i);
+ output = inputs[i]->reshape(1, (int)outShape.size(), &outShape[0]);
+ }
+
+ return true;
+ }
+#endif
+
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+ outputs_arr.isUMatVector() &&
+ OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+ forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
};
#ifdef HAVE_OPENCL
- bool forward_ocl(std::vector<Mat*> &input, std::vector<Mat> &output)
+ bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
{
- int axisCan = clamp(axis, input[0]->dims);
- int numOutput = blobs[0].size[0];
- int innerSize = blobs[0].size[1];
- int outerSize = input[0]->total(0, axisCan);
+ std::vector<UMat> inputs;
+ std::vector<UMat> outputs;
+
+ inps.getUMatVector(inputs);
+ outs.getUMatVector(outputs);
+
+ int axisCan = clamp(axis, inputs[0].dims);
+ int numOutput = umat_blobs[0].size[0];
+ int innerSize = umat_blobs[0].size[1];
+ int outerSize = total(shape(inputs[0]), 0, axisCan);
bool ret = true;
if (innerProductOp.empty())
}
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
- for (size_t i = 0; i < input.size(); i++)
+ for (size_t i = 0; i < inputs.size(); i++)
{
- UMat srcMat, dstMat;
- srcMat = input[i]->reshape(1, outerSize).getUMat(ACCESS_READ);
- dstMat = output[i].reshape(1, outerSize).getUMat(ACCESS_WRITE);
+ UMat& srcMat = inputs[i];
+ UMat& dstMat = outputs[i];
dstMat.setTo(0.0f);
if (!innerProductOp->Forward(srcMat, umat_blobs[0], (bias) ? umat_blobs[1] : UMat(), dstMat))
if (ret) return true;
UMat& weights = umat_blobs[0];
- for (size_t i = 0; i < input.size(); i++)
+ for (size_t i = 0; i < inputs.size(); i++)
{
+ MatShape inshape, outshape;
+ inshape = shape(outerSize, innerSize);
+ outshape = shape(outerSize, numOutput);
+
UMat srcMat, dstMat;
- srcMat = input[i]->reshape(1, outerSize).getUMat(ACCESS_READ);
- dstMat = output[i].reshape(1, outerSize).getUMat(ACCESS_WRITE);
+ srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
+ dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat, GEMM_2_T);
}
#endif
- void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &)
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
- forward_ocl(input, output))
+ forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
+ void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
int axisCan = clamp(axis, input[0]->dims);
int outerSize = input[0]->total(0, axisCan);
}
#ifdef HAVE_OPENCL
- bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
+ std::vector<UMat> inputs;
+ std::vector<UMat> outputs;
+
+ inps.getUMatVector(inputs);
+ outs.getUMatVector(outputs);
+
if (lrnOp.empty())
{
OCL4DNNLRNConfig config;
config.alpha = alpha;
config.beta = beta;
config.k = bias;
- CHECK_EQ(4, inputs[0]->dims) << "Input must have 4 axes, "
+ CHECK_EQ(4, inputs[0].dims) << "Input must have 4 axes, "
<< "corresponding to (num, channels, height, width)";
- config.batch_size = inputs[0]->size[0];
- config.channels = inputs[0]->size[1];
- config.height = inputs[0]->size[2];
- config.width = inputs[0]->size[3];
+ config.batch_size = inputs[0].size[0];
+ config.channels = inputs[0].size[1];
+ config.height = inputs[0].size[2];
+ config.width = inputs[0].size[3];
config.norm_by_size = normBySize;
lrnOp = Ptr<OCL4DNNLRN<float> >(new OCL4DNNLRN<float>(config));
}
- UMat inpMat, outMat;
- inpMat = inputs[0]->getUMat(ACCESS_READ);
- outMat = outputs[0].getUMat(ACCESS_WRITE);
-
- if (!lrnOp->Forward(inpMat, outMat))
+ if (!lrnOp->Forward(inputs[0], outputs[0]))
return false;
return true;
}
#endif
- void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
- CV_Assert(inputs.size() == outputs.size());
+ CV_Assert(inputs_arr.total() == outputs_arr.total());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
- forward_ocl(inputs, outputs, internals))
+ forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
+ void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ CV_Assert(inputs.size() == outputs.size());
for (int i = 0; i < inputs.size(); i++)
{
return false;
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
eps = params.get<double>("eps", 1e-9);
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
return true;
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4;
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
}
};
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
}
#ifdef HAVE_OPENCL
- bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
{
+ std::vector<UMat> inputs;
+ std::vector<UMat> outputs;
+
+ inps.getUMatVector(inputs);
+ outs.getUMatVector(outputs);
+
if (poolOp.empty())
{
OCL4DNNPoolConfig config;
- config.in_shape = shape(*inputs[0]);
+ config.in_shape = shape(inputs[0]);
config.out_shape = shape(outputs[0]);
config.kernel = kernel;
config.pad = pad;
config.stride = stride;
- config.channels = inputs[0]->size[1];
+ config.channels = inputs[0].size[1];
config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX :
(type == AVE ? LIBDNN_POOLING_METHOD_AVE :
LIBDNN_POOLING_METHOD_STO);
for (size_t ii = 0; ii < inputs.size(); ii++)
{
- UMat inpMat, outMat, maskMat;
-
- inpMat = inputs[ii]->getUMat(ACCESS_READ);
-
- if (type == MAX)
- {
- outMat = outputs[2 * ii].getUMat(ACCESS_WRITE);
- maskMat = outputs[2 * ii + 1].getUMat(ACCESS_WRITE);
- } else {
- outMat = outputs[ii].getUMat(ACCESS_WRITE);
- maskMat = UMat();
- }
+ UMat& inpMat = inputs[ii];
+ int out_index = (type == MAX) ? 2 : 1;
+ UMat& outMat = outputs[out_index * ii];
+ UMat maskMat = (type == MAX) ? outputs[2 * ii + 1] : UMat();
CV_Assert(inpMat.offset == 0 && outMat.offset == 0);
}
#endif
- void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
- forward_ocl(inputs, outputs, internals))
+ forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
+ void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
for (size_t ii = 0; ii < inputs.size(); ii++)
{
return false;
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
allocated = true;
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
}
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
}
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
{
return backendId == DNN_BACKEND_DEFAULT;
}
+
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
return true;
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
backendId == DNN_BACKEND_HALIDE && haveHalide();
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
return true;
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
}
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
}
#ifdef HAVE_OPENCL
- bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays itns)
{
+ std::vector<UMat> inputs;
+ std::vector<UMat> outputs;
+ std::vector<UMat> internals;
+
+ inps.getUMatVector(inputs);
+ outs.getUMatVector(outputs);
+ itns.getUMatVector(internals);
+
if (softmaxOp.empty())
{
OCL4DNNSoftmaxConfig config;
- config.in_shape = shape(*inputs[0]);
+ config.in_shape = shape(inputs[0]);
config.axis = axisRaw;
- config.channels = inputs[0]->size[axisRaw];
+ config.channels = inputs[0].size[axisRaw];
config.logsoftmax = logSoftMax;
softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));
}
- UMat srcMat, dstMat;
- srcMat = inputs[0]->getUMat(ACCESS_READ);
- dstMat = outputs[0].getUMat(ACCESS_WRITE);
+ UMat& src = inputs[0];
+ UMat& dstMat = outputs[0];
- if (softmaxOp->Forward(srcMat, dstMat))
+ if (softmaxOp->Forward(src, dstMat))
return true;
- const Mat &src = *inputs[0];
- UMat bufMat = internals[0].getUMat(ACCESS_WRITE);
- srcMat.copyTo(dstMat);
+ UMat& bufMat = internals[0];
+ src.copyTo(dstMat);
int axis = clamp(axisRaw, src.dims);
- size_t outerSize = src.total(0, axis);
+ MatShape s = shape(src);
+ size_t outerSize = total(s, 0, axis);
size_t channels = src.size[axis];
- size_t innerSize = src.total(axis + 1);
+ size_t innerSize = total(s, axis + 1);
String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
ocl::Kernel kmax, ksub, ksum, kdiv;
}
#endif
- void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
- forward_ocl(inputs, outputs, internals))
+ forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
+ void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
const Mat &src = *inputs[0];
Mat &dst = outputs[0];
return false;
}
+ void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
+ {
+ CV_TRACE_FUNCTION();
+ CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+ Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+ }
+
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();