class CV_EXPORTS ReLU6Layer : public ActivationLayer
{
public:
+ float minValue, maxValue;
+
static Ptr<ReLU6Layer> create(const LayerParams ¶ms);
};
nextData &&
((nextData->type == "ReLU") ||
(nextData->type == "ChannelsPReLU") ||
+ (nextData->type == "ReLU6") ||
(nextData->type == "TanH") ||
(nextData->type == "Power"))) )
{
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
}
+ Ptr<ReLU6Layer> activ_relu6 = activ.dynamicCast<ReLU6Layer>();
+ if( !activ_relu6.empty() )
+ {
+ reluslope.resize(2);
+ reluslope[0] = activ_relu6->minValue;
+ reluslope[1] = activ_relu6->maxValue;
+ activType = OCL4DNN_CONV_FUSED_ACTIV_RELU6;
+ }
+
Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
if( !activ_chprelu.empty() )
{
{
convolutionOp->setActivTanh(true);
}
+ else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_RELU6)
+ {
+ convolutionOp->setActivReLU6(true, reluslope[0], reluslope[1]);
+ }
else
{
convolutionOp->setActivReLU(false, 0);
convolutionOp->setActivPReLU(false, reluslope);
convolutionOp->setActivPower(false, 1.f);
convolutionOp->setActivTanh(false);
+ convolutionOp->setActivReLU6(false, 0, 0);
}
newActiv = false;
}
#ifdef HAVE_OPENCL
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
- // TODO: implement OCL version
- return false;
+ std::vector<UMat> inputs;
+ std::vector<UMat> outputs;
+
+ inps.getUMatVector(inputs);
+ outs.getUMatVector(outputs);
+ String buildopt = oclGetTMacro(inputs[0]);
+
+ for (size_t i = 0; i < inputs.size(); i++)
+ {
+ UMat& src = inputs[i];
+ UMat& dst = outputs[i];
+
+ ocl::Kernel kernel("ReLU6Forward", ocl::dnn::activations_oclsrc, buildopt);
+ kernel.set(0, (int)src.total());
+ kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
+ kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
+ kernel.set(3, (float)minValue);
+ kernel.set(4, (float)maxValue);
+
+ size_t gSize = src.total();
+ CV_Assert(kernel.run(1, &gSize, NULL, false));
+ }
+
+ return true;
}
#endif
float maxValue = params.get<float>("max_value", 6.0f);
Ptr<ReLU6Layer> l(new ElementWiseLayer<ReLU6Functor>(ReLU6Functor(minValue, maxValue)));
l->setParamsFrom(params);
+ l->minValue = minValue;
+ l->maxValue = maxValue;
+
return l;
}
OCL4DNN_CONV_FUSED_ACTIV_RELU = 1,
OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2,
OCL4DNN_CONV_FUSED_ACTIV_POWER = 3,
- OCL4DNN_CONV_FUSED_ACTIV_TANH = 4
+ OCL4DNN_CONV_FUSED_ACTIV_TANH = 4,
+ OCL4DNN_CONV_FUSED_ACTIV_RELU6 = 5
} ocl4dnnFusedActiv_t;
template<typename Dtype>
void setActivPReLU(bool fuse_activ, std::vector<float> &slope);
void setActivPower(bool fuse_activ, float power);
void setActivTanh(bool fuse_activ);
+ void setActivReLU6(bool fuse_activ, float min, float max);
void setBias(bool bias_term);
private:
cv::ocl::ProgramSource src_;
int32_t prev_kernel_type_;
float negative_slope_;
+ float min_value_;
+ float max_value_;
UMat negative_slope_umat_;
ocl4dnnFusedActiv_t fused_activ_;
float power_;
fused_eltwise_ = false;
power_ = 1.f;
negative_slope_ = 0;
+ min_value_ = 0;
+ max_value_ = 0;
prev_kernel_type_ = -1;
tuned_ = false;
case OCL4DNN_CONV_FUSED_ACTIV_TANH:
addDef("FUSED_CONV_TANH", 1);
break;
+ case OCL4DNN_CONV_FUSED_ACTIV_RELU6:
+ addDef("FUSED_CONV_RELU6", 1);
+ break;
default:
;
}
case OCL4DNN_CONV_FUSED_ACTIV_POWER:
kernel.set(argIdx++, (float)power_);
break;
+ case OCL4DNN_CONV_FUSED_ACTIV_RELU6:
+ kernel.set(argIdx++, (float)min_value_);
+ kernel.set(argIdx++, (float)max_value_);
+ break;
default:
;
}
}
template<typename Dtype>
+void OCL4DNNConvSpatial<Dtype>::setActivReLU6(bool fuse_activ, float min, float max)
+{
+ if ( fuse_activ )
+ {
+ fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_RELU6;
+ min_value_ = min;
+ max_value_ = max;
+ }
+ else
+ fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
+}
+
+template<typename Dtype>
void OCL4DNNConvSpatial<Dtype>::setActivPReLU(bool fuse_activ, std::vector<float> &slope)
{
if ( fuse_activ )
#endif
}
+__kernel void ReLU6Forward(const int count, __global const T* in, __global T* out,
+ const T minValue, const T maxValue)
+{
+ int index = get_global_id(0);
+ if(index < count)
+ {
+ T x = in[index];
+ out[index] = clamp(x, minValue, maxValue);
+ }
+}
+
__kernel void PReLUForward(const int count, const int channels, const int plane_size,
__global const T* in, __global T* out, __global const T* slope_data)
{
#if defined(FUSED_CONV_RELU)
#define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope)))
-#define NEGATIVE_SLOPE_ARG Dtype negative_slope,
+#define FUSED_ARG Dtype negative_slope,
#elif defined(FUSED_CONV_PRELU)
#define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope[c])))
-#define NEGATIVE_SLOPE_ARG __global const Dtype *negative_slope,
+#define FUSED_ARG __global const Dtype *negative_slope,
#elif defined(FUSED_CONV_POWER)
#define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power)
-#define NEGATIVE_SLOPE_ARG Dtype power,
+#define FUSED_ARG Dtype power,
#elif defined(FUSED_CONV_TANH)
#define ACTIVATION_RELU_FUNCTION(x, c) tanh(x)
-#define NEGATIVE_SLOPE_ARG
+#define FUSED_ARG
+#elif defined(FUSED_CONV_RELU6)
+#define ACTIVATION_RELU_FUNCTION(x, c) (clamp((Dtype)(x), min_value, max_value))
+#define FUSED_ARG Dtype min_value, Dtype max_value,
#else
#define ACTIVATION_RELU_FUNCTION(x, c) (x)
-#define NEGATIVE_SLOPE_ARG
+#define FUSED_ARG
#endif
#ifdef FUSED_CONV_ELTWISE
__kernel void ConvolveBasic(
ELTWISE_DATA_ARG
- NEGATIVE_SLOPE_ARG
+ FUSED_ARG
__global Dtype* image_data,
int image_offset,
__global Dtype* kernel_data,
__kernel void
convolve_simd(
ELTWISE_DATA_ARG
- NEGATIVE_SLOPE_ARG
+ FUSED_ARG
__global Dtype* inputs_base,
filter_qualifier Dtype* weights_base,
BIAS_KERNEL_ARG
#define GEMM_LIKE_KERNEL_ARGS \
ELTWISE_DATA_ARG \
- NEGATIVE_SLOPE_ARG \
+ FUSED_ARG \
const __global Dtype *src0, \
const __global Dtype *src1, \
BIAS_KERNEL_ARG \
__kernel void DWCONV(
ELTWISE_DATA_ARG
- NEGATIVE_SLOPE_ARG
+ FUSED_ARG
__global Dtype* image_data,
__global Dtype* kernel_data,
BIAS_KERNEL_ARG