modules/dnn/src/layers/softmax_layer.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "../precomp.hpp"
  44 #include "layers_common.hpp"
  45 #include "op_halide.hpp"
  46 #include "opencl_kernels_dnn.hpp"
  47 #include <algorithm>
  48 #include <stdlib.h>
  49 using std::max;
  50 #ifdef HAVE_OPENCL
  51 using namespace cv::dnn::ocl4dnn;
  52 #endif
  53
  54 namespace cv
  55 {
  56 namespace dnn
  57 {
  58
  59 class SoftMaxLayerImpl : public SoftmaxLayer
  60 {
  61 public:
  62
  63     SoftMaxLayerImpl(const LayerParams& params)
  64     {
  65         axisRaw = params.get<int>("axis", 1);
  66         logSoftMax = params.get<int>("log_softmax", false);
  67         setParamsFrom(params);
  68     }
  69
  70 #ifdef HAVE_OPENCL
  71     Ptr<OCL4DNNSoftmax<float> > softmaxOp;
  72 #endif
  73
  74     bool getMemoryShapes(const std::vector<MatShape> &inputs,
  75                          const int requiredOutputs,
  76                          std::vector<MatShape> &outputs,
  77                          std::vector<MatShape> &internals) const
  78     {
  79         bool inplace = Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
  80         MatShape shape = inputs[0];
  81         int cAxis = clamp(axisRaw, shape.size());
  82         shape[cAxis] = 1;
  83         internals.assign(1, shape);
  84         return inplace;
  85     }
  86
  87     virtual bool supportBackend(int backendId)
  88     {
  89         return backendId == DNN_BACKEND_DEFAULT ||
  90                backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1;
  91     }
  92
  93 #ifdef HAVE_OPENCL
  94     bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
  95     {
  96         if (softmaxOp.empty())
  97         {
  98             OCL4DNNSoftmaxConfig config;
  99
 100             config.in_shape = shape(*inputs[0]);
 101             config.axis = axisRaw;
 102             config.channels = inputs[0]->size[axisRaw];
 103             config.logsoftmax = logSoftMax;
 104
 105             softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));
 106         }
 107
 108         UMat srcMat, dstMat;
 109         srcMat = inputs[0]->getUMat(ACCESS_READ);
 110         dstMat = outputs[0].getUMat(ACCESS_WRITE);
 111
 112         if (softmaxOp->Forward(srcMat, dstMat))
 113             return true;
 114
 115         const Mat &src = *inputs[0];
 116         UMat bufMat = internals[0].getUMat(ACCESS_WRITE);
 117         srcMat.copyTo(dstMat);
 118
 119         int axis = clamp(axisRaw, src.dims);
 120         size_t outerSize = src.total(0, axis);
 121         size_t channels = src.size[axis];
 122         size_t innerSize = src.total(axis + 1);
 123
 124         String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
 125         ocl::Kernel kmax, ksub, ksum, kdiv;
 126
 127         if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
 128             return false;
 129
 130         if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
 131             return false;
 132
 133         if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
 134             return false;
 135
 136         if (logSoftMax) buildOpts += " -DLOG_SOFTMAX ";
 137         if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
 138             return false;
 139
 140         size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
 141         size_t bufSize = internals[0].total();
 142         size_t totalSize = src.total();
 143
 144         kmax.args((int)outerSize, (int)channels, (int)innerSize,
 145                   ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
 146         if (!kmax.run(1, &bufSize, &wgSize, false))
 147             return false;
 148
 149         ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
 150                   ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
 151         if (!ksub.run(1, &totalSize, &wgSize, false))
 152             return false;
 153
 154         cv::exp(dstMat, dstMat);
 155
 156         ksum.args((int)outerSize, (int)channels, (int)innerSize,
 157                   ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
 158         if (!ksum.run(1, &bufSize, &wgSize, false))
 159             return false;
 160
 161         kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
 162                   ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
 163         if (!kdiv.run(1, &totalSize, &wgSize, false))
 164             return false;
 165
 166         return true;
 167     }
 168 #endif
 169
 170     void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
 171     {
 172         CV_TRACE_FUNCTION();
 173         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 174
 175         CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
 176                    OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
 177                    forward_ocl(inputs, outputs, internals))
 178
 179         const Mat &src = *inputs[0];
 180         Mat &dst = outputs[0];
 181
 182         int axis = clamp(axisRaw, src.dims);
 183         size_t outerSize = src.total(0, axis), channels = src.size[axis],
 184                 innerSize = src.total(axis + 1);
 185
 186         CV_Assert(src.type() == CV_32F);
 187         CV_Assert(src.isContinuous() && dst.isContinuous());
 188
 189         const float *srcPtr = src.ptr<float>();
 190         float *dstPtr = dst.ptr<float>();
 191         float *bufPtr = internals[0].ptr<float>();
 192
 193         size_t outerStep = src.total(axis);
 194         size_t cnStep = src.total(axis + 1);
 195
 196         //compute max along axis
 197         for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
 198         {
 199             size_t srcOffset = outerDim * outerStep;
 200             size_t bufOffset = outerDim * cnStep;
 201
 202             memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
 203
 204             for (size_t cnDim = 1; cnDim < channels; cnDim++)
 205             {
 206                 for (size_t i = 0; i < innerSize; i++)
 207                     bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
 208             }
 209         }
 210
 211         //subtract max
 212         for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
 213         {
 214             size_t srcOffset = outerDim * outerStep;
 215             size_t bufOffset = outerDim * cnStep;
 216
 217             for (size_t cnDim = 0; cnDim < channels; cnDim++)
 218             {
 219                 const int offset = srcOffset + cnDim * cnStep;
 220                 for (size_t i = 0; i < innerSize; i++)
 221                     dstPtr[offset + i] = srcPtr[offset + i] - bufPtr[bufOffset + i];
 222             }
 223         }
 224
 225         cv::exp(dst, dst);
 226
 227         for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
 228         {
 229             size_t srcOffset = outerDim * outerStep;
 230             size_t bufOffset = outerDim * cnStep;
 231
 232             //sum exp along axis
 233             for (size_t i = 0; i < innerSize; i++)
 234                 bufPtr[bufOffset + i] = 0.f;
 235
 236             for (size_t cnDim = 0; cnDim < channels; cnDim++)
 237             {
 238                 const int offset = srcOffset + cnDim * cnStep;
 239                 for (size_t i = 0; i < innerSize; i++)
 240                     bufPtr[bufOffset + i] += dstPtr[offset + i];
 241             }
 242
 243             //divide by computed sum
 244             for (size_t cnDim = 0; cnDim < channels; cnDim++)
 245             {
 246                 const int offset = srcOffset + cnDim * cnStep;
 247                 for (size_t i = 0; i < innerSize; i++)
 248                     dstPtr[offset + i] /= bufPtr[bufOffset + i];
 249             }
 250             if (logSoftMax)
 251             {
 252                 for (size_t cnDim = 0; cnDim < channels; cnDim++)
 253                 {
 254                     const int offset = srcOffset + cnDim * cnStep;
 255                     for (size_t i = 0; i < innerSize; i++)
 256                         dstPtr[offset + i] = log(dstPtr[offset + i]);
 257                 }
 258             }
 259         }
 260     }
 261
 262     virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
 263     {
 264 #ifdef HAVE_HALIDE
 265         Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
 266         int inW, inH, inC, inN;
 267         getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
 268
 269         if (inW != 1 || inH != 1)
 270             CV_Error(cv::Error::StsNotImplemented,
 271                      "Halide backend for SoftMax with spatial size "
 272                      "more than 1x1 is not implemented");
 273
 274         Halide::Var x("x"), y("y"), c("c"), n("n");
 275         Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
 276
 277         Halide::Func expInput("expInput");
 278         Halide::RDom r(0, inW, 0, inH, 0, inC);
 279         expInput(x, y, c, n) = exp(inputBuffer(x, y, c, n));
 280         Halide::Expr globalSum = sum(expInput(r.x, r.y, r.z, n));
 281         top(x, y, c, n) = expInput(x, y, c, n) / globalSum;
 282         return Ptr<BackendNode>(new HalideBackendNode(top));
 283 #endif  // HAVE_HALIDE
 284         return Ptr<BackendNode>();
 285     }
 286
 287     int64 getFLOPS(const std::vector<MatShape> &inputs,
 288                   const std::vector<MatShape> &outputs) const
 289     {
 290         (void)outputs; // suppress unused variable warning
 291         int64 flops = 0;
 292
 293         for (int i = 0; i < inputs.size(); i++)
 294         {
 295             flops += 4*total(inputs[i]);
 296         }
 297
 298         return flops;
 299     }
 300
 301     int axisRaw;
 302 };
 303
 304 Ptr<SoftmaxLayer> SoftmaxLayer::create(const LayerParams& params)
 305 {
 306     return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(params));
 307 }
 308
 309 }
 310 }