bool fusedActivation = false;
bool fusedAdd = false;
bool isConv2D = false; // Should be deleted after fastconv branch support Conv1D and Conv3D.
+ bool useWinograd = false; // Flag whether to use Winograd to speed up 3x3 convolution.
};
class CV_EXPORTS ConvolutionLayerInt8 : public BaseConvolutionLayer
// quantization type flag. The perChannel default is true, that means it contains the parameters
// of per-Channel quantization. Otherwise, that means this layer contains per-Tensor quantized parameters.
bool per_channel;
+ bool useWinograd = true; // Flag whether to use Winograd to speed up 3x3 convolution.
static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
};
*/
CV_WRAP void enableFusion(bool fusion);
+ /** @brief Enables or disables the Winograd compute branch. The Winograd compute branch can speed up
+ * 3x3 Convolution at a small loss of accuracy.
+ * @param useWinograd true to enable the Winograd compute branch. The default is true.
+ */
+ CV_WRAP void enableWinograd(bool useWinograd);
+
/** @brief Returns overall time for inference and timings (in ticks) for layers.
*
* Indexes in returned vector correspond to layers ids. Some layers can be fused with others,
BaseConvolutionLayerInt8Impl(const LayerParams ¶ms)
{
setParamsFrom(params);
- getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads);
+ getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads, useWinograd);
numOutput = params.get<int>("num_output");
int ngroups = params.get<int>("group", 1);
{
void getConvolutionKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations,
- cv::String &padMode, std::vector<size_t>& adjust_pads);
+ cv::String &padMode, std::vector<size_t>& adjust_pads, bool& useWinograd);
void getPoolingKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);
BaseConvolutionLayerImpl(const LayerParams ¶ms)
{
setParamsFrom(params);
- getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads);
+ getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations,
+ padMode, adjust_pads, useWinograd);
numOutput = params.get<int>("num_output");
int ngroups = params.get<int>("group", 1);
int dilation_w = dilations.back();
fastConv2dImpl = initFastConv2d(ngroups, K, C, Hk, Wk, stride_w, stride_h, dilation_w,
- dilation_h, pads_begin, pads_end, weightsMat, &biasvec[0]);
+ dilation_h, pads_begin, pads_end, weightsMat, &biasvec[0], useWinograd);
}
if (fastConv2dImpl)
const std::vector<size_t>& pads_begin,
const std::vector<size_t>& pads_end,
InputArray _weightsMat,
- float* srcBias)
+ float* srcBias,
+ bool useWinograd)
{
Ptr<FastConv2d> conv = makePtr<FastConv2d>();
const size_t wstep = weightsMat.step1();
#if CV_NEON // For now, winograd is ARM platform only.
- if (ngroups == 1 && Hk ==3 && Wk == 3 && stride_x == 1 && stride_y == 1 &&
+ if (useWinograd && ngroups == 1 && Hk ==3 && Wk == 3 && stride_x == 1 && stride_y == 1 &&
dilation_x == 1 && dilation_y ==1 && K >= 16 && C >= 16)
- conv->ifWinograd63 = true;
+ conv->useWinograd63 = true;
#else
- conv->ifWinograd63 = false;
+ conv->useWinograd63 = false;
#endif
float *srcWeights = (float *)weightsMat.data;
}});
// Prepare Weight for Winograd F(6x6, 3x3)
- if (conv->ifWinograd63)
+ if (conv->useWinograd63)
{
initWinograd63(conv, weightsMat, K, C);
}
}
#if CV_NEON
- if (conv->ifWinograd63
- && inputShape[2] > 12 && inputShape[3] > 12
- && inputShape[2] < 120 && inputShape[3] < 120
- )
+ if (conv->useWinograd63 && inputShape[2] > 12 && inputShape[3] > 12)
{
if (runWinograd63(input, fusedAddMat, output, conv, ntasks, minval, maxval, activ, ifMinMaxAct))
return;
std::vector<float> weightsBuf; // For generic Conv 2D
std::vector<float> weightsWino63Buf; // For Winograd F(6x6, 3x3).
std::vector<float> biasBuf;
- bool ifWinograd63 = false;
+ bool useWinograd63 = false;
bool useAVX2 = checkHardwareSupport(CPU_AVX2);
bool useNEON = checkHardwareSupport(CPU_NEON);
};
const std::vector<size_t>& pads_begin,
const std::vector<size_t>& pads_end,
InputArray weightsMat,
- float* srcBias);
+ float* srcBias, bool useWinograd);
// It contains different computing branches, like winograd, 1x1 conv.
void runFastConv2d(InputArray _input, OutputArray _output, const Ptr<FastConv2d>& conv, int ntasks,
void initWinograd63(Ptr<FastConv2d>& conv, InputArray _weightsMat, int K, int C)
{
- conv->ifWinograd63 = false;
+ conv->useWinograd63 = false;
}
int runWinograd63(InputArray _input, OutputArray _output, const Ptr<FastConv2d>& conv, int ntasks, float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct)
void getConvolutionKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
std::vector<size_t>& pads_end, std::vector<size_t>& strides,
- std::vector<size_t>& dilations, cv::String &padMode, std::vector<size_t>& adjust_pads)
+ std::vector<size_t>& dilations, cv::String &padMode, std::vector<size_t>& adjust_pads,
+ bool& useWinograd)
{
util::getKernelSize(params, kernel);
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size());
util::getParameter(params, "dilation", "dilation", dilations, true, std::vector<size_t>(kernel.size(), 1));
util::getParameter(params, "adj", "adj", adjust_pads, true, std::vector<size_t>(kernel.size(), 0));
+ useWinograd = params.get<bool>("use_winograd", true);
for (int i = 0; i < dilations.size(); i++)
CV_Assert(dilations[i] > 0);
{
void getConvolutionKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations,
- cv::String &padMode, std::vector<size_t>& adjust_pads);
+ cv::String &padMode, std::vector<size_t>& adjust_pads, bool& useWinograd);
void getPoolingKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);
return impl->enableFusion(fusion);
}
+void Net::enableWinograd(bool useWinograd)
+{
+ CV_TRACE_FUNCTION();
+ CV_Assert(impl);
+ return impl->enableWinograd(useWinograd);
+}
+
void Net::setHalideScheduler(const String& scheduler)
{
CV_TRACE_FUNCTION();
preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT();
preferableTarget = DNN_TARGET_CPU;
hasDynamicShapes = false;
+ useWinograd = true;
}
}
}
+void Net::Impl::enableWinograd(bool useWinograd_)
+{
+ if (useWinograd != useWinograd_)
+ {
+ useWinograd = useWinograd_;
+
+ for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++)
+ {
+ int lid = it->first;
+ LayerData &ld = layers[lid];
+ Ptr<Layer>& currLayer = ld.layerInstance;
+
+ if (ld.type == "Convolution")
+ {
+ ld.params.set("use_winograd", useWinograd_);
+ Ptr<ConvolutionLayer> convLayer = ld.layerInstance.dynamicCast<ConvolutionLayer>();
+ if (!convLayer.empty())
+ convLayer->useWinograd = useWinograd_;
+ }
+
+ if (ld.type == "ConvolutionInt8")
+ {
+ Ptr<ConvolutionLayerInt8> convLayer = currLayer.dynamicCast<ConvolutionLayerInt8>();
+ ld.params.set("use_winograd", useWinograd_);
+ if (!convLayer.empty())
+ convLayer->useWinograd = useWinograd_;
+ }
+ }
+ }
+}
+
// TODO drop?
void Net::Impl::getLayerTypes(std::vector<String>& layersTypes) const
bool netWasQuantized;
bool fusion;
bool isAsync; // FIXIT: drop
+ bool useWinograd;
std::vector<int64> layersTimings;
void enableFusion(bool fusion_);
virtual void fuseLayers(const std::vector<LayerPin>& blobsToKeep_);
+ void enableWinograd(bool useWinograd_);
void allocateLayers(const std::vector<LayerPin>& blobsToKeep_);
setPreferableBackend(net, DNN_BACKEND_OPENCV);
setPreferableTarget(DNN_TARGET_CPU);
enableFusion(false);
+ enableWinograd(false);
if (calibData.isMat())
{
ASSERT_TRUE(!net.empty());
}
+ net.enableWinograd(false);
net.setPreferableBackend(backend);
net.setPreferableTarget(target);