modules/dnn/src/layers/softmax_layer.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "../precomp.hpp"
  44 #include "layers_common.hpp"
  45 #include "../op_halide.hpp"
  46 #include "../op_inf_engine.hpp"
  47 #include "../op_vkcom.hpp"
  48 #include <algorithm>
  49 #include <stdlib.h>
  50 using std::max;
  51
  52 #ifdef HAVE_OPENCL
  53 #include "opencl_kernels_dnn.hpp"
  54 using namespace cv::dnn::ocl4dnn;
  55 #endif
  56
  57 namespace cv
  58 {
  59 namespace dnn
  60 {
  61
  62 class SoftMaxLayerImpl CV_FINAL : public SoftmaxLayer
  63 {
  64 public:
  65
  66     SoftMaxLayerImpl(const LayerParams& params)
  67     {
  68         axisRaw = params.get<int>("axis", 1);
  69         logSoftMax = params.get<bool>("log_softmax", false);
  70         setParamsFrom(params);
  71     }
  72
  73 #ifdef HAVE_OPENCL
  74     Ptr<OCL4DNNSoftmax<float> > softmaxOp;
  75 #endif
  76
  77     bool getMemoryShapes(const std::vector<MatShape> &inputs,
  78                          const int requiredOutputs,
  79                          std::vector<MatShape> &outputs,
  80                          std::vector<MatShape> &internals) const CV_OVERRIDE
  81     {
  82         bool inplace = Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
  83         MatShape shape = inputs[0];
  84         int cAxis = clamp(axisRaw, shape.size());
  85         shape[cAxis] = 1;
  86         internals.assign(1, shape);
  87         return inplace;
  88     }
  89
  90     virtual bool supportBackend(int backendId) CV_OVERRIDE
  91     {
  92         return backendId == DNN_BACKEND_OPENCV ||
  93                (backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1) ||
  94                (backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !logSoftMax) ||
  95                (backendId == DNN_BACKEND_VKCOM && haveVulkan());
  96     }
  97
  98 #ifdef HAVE_OPENCL
  99     virtual void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs) CV_OVERRIDE
 100     {
 101         softmaxOp.release();
 102     }
 103
 104     bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
 105     {
 106         std::vector<UMat> inputs;
 107         std::vector<UMat> outputs;
 108         std::vector<UMat> internals;
 109
 110         bool use_half = (inputs_.depth() == CV_16S);
 111         inputs_.getUMatVector(inputs);
 112         outputs_.getUMatVector(outputs);
 113         internals_.getUMatVector(internals);
 114
 115         UMat& src = inputs[0];
 116         UMat& dstMat = outputs[0];
 117         int axis = clamp(axisRaw, src.dims);
 118
 119         if (softmaxOp.empty())
 120         {
 121             OCL4DNNSoftmaxConfig config;
 122             config.in_shape = shape(inputs[0]);
 123             config.axis = axis;
 124             config.channels = inputs[0].size[axis];
 125             config.logsoftmax = logSoftMax;
 126             config.use_half = use_half;
 127
 128             softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));
 129         }
 130
 131         if (softmaxOp->Forward(src, dstMat))
 132             return true;
 133
 134         UMat& bufMat = internals[0];
 135         MatShape s = shape(src);
 136         size_t outerSize = total(s, 0, axis);
 137         size_t channels = src.size[axis];
 138         size_t innerSize = total(s, axis + 1);
 139
 140         String buildOpts = format("-DT=%s", use_half ? "half" : "float");
 141         ocl::Kernel kmax, ksub, ksum, kdiv;
 142
 143         if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
 144             return false;
 145
 146         if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
 147             return false;
 148
 149         if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
 150             return false;
 151
 152         if (logSoftMax) buildOpts += " -DLOG_SOFTMAX ";
 153         if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
 154             return false;
 155
 156         size_t bufSize = internals[0].total();
 157         size_t totalSize = src.total();
 158
 159         size_t internal_globalSize[1] = { bufSize };
 160         size_t total_globalSize[1] = { totalSize };
 161
 162         kmax.args((int)outerSize, (int)channels, (int)innerSize,
 163                   ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrReadWrite(bufMat));
 164         if (!kmax.run(1, internal_globalSize, NULL, false))
 165             return false;
 166
 167         ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
 168                   ocl::KernelArg::PtrReadOnly(bufMat),
 169                   ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(dstMat));
 170         if (!ksub.run(1, total_globalSize, NULL, false))
 171             return false;
 172
 173         ksum.args((int)outerSize, (int)channels, (int)innerSize,
 174                   ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
 175         if (!ksum.run(1, internal_globalSize, NULL, false))
 176             return false;
 177
 178         kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
 179                   ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
 180         if (!kdiv.run(1, total_globalSize, NULL, false))
 181             return false;
 182
 183         return true;
 184     }
 185 #endif
 186
 187     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
 188     {
 189         CV_TRACE_FUNCTION();
 190         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 191
 192         CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
 193                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
 194
 195         if (inputs_arr.depth() == CV_16S)
 196         {
 197             forward_fallback(inputs_arr, outputs_arr, internals_arr);
 198             return;
 199         }
 200
 201         std::vector<Mat> inputs, outputs, internals;
 202         inputs_arr.getMatVector(inputs);
 203         outputs_arr.getMatVector(outputs);
 204         internals_arr.getMatVector(internals);
 205
 206         const Mat &src = inputs[0];
 207         Mat &dst = outputs[0];
 208
 209         int axis = clamp(axisRaw, src.dims);
 210         size_t outerSize = src.total(0, axis), channels = src.size[axis],
 211                 innerSize = src.total(axis + 1);
 212
 213         CV_Assert(src.type() == CV_32F);
 214         CV_Assert(src.isContinuous() && dst.isContinuous());
 215
 216         const float *srcPtr = src.ptr<float>();
 217         float *dstPtr = dst.ptr<float>();
 218         float *bufPtr = internals[0].ptr<float>();
 219
 220         size_t outerStep = src.total(axis);
 221         size_t cnStep = src.total(axis + 1);
 222
 223         //compute max along axis
 224         for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
 225         {
 226             size_t srcOffset = outerDim * outerStep;
 227             size_t bufOffset = outerDim * cnStep;
 228
 229             memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
 230
 231             for (size_t cnDim = 1; cnDim < channels; cnDim++)
 232             {
 233                 for (size_t i = 0; i < innerSize; i++)
 234                     bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
 235             }
 236         }
 237
 238         //subtract max
 239         for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
 240         {
 241             size_t srcOffset = outerDim * outerStep;
 242             size_t bufOffset = outerDim * cnStep;
 243
 244             for (size_t cnDim = 0; cnDim < channels; cnDim++)
 245             {
 246                 const int offset = srcOffset + cnDim * cnStep;
 247                 for (size_t i = 0; i < innerSize; i++)
 248                     dstPtr[offset + i] = srcPtr[offset + i] - bufPtr[bufOffset + i];
 249             }
 250         }
 251
 252         cv::exp(dst, dst);
 253
 254         for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
 255         {
 256             size_t srcOffset = outerDim * outerStep;
 257             size_t bufOffset = outerDim * cnStep;
 258
 259             //sum exp along axis
 260             for (size_t i = 0; i < innerSize; i++)
 261                 bufPtr[bufOffset + i] = 0.f;
 262
 263             for (size_t cnDim = 0; cnDim < channels; cnDim++)
 264             {
 265                 const int offset = srcOffset + cnDim * cnStep;
 266                 for (size_t i = 0; i < innerSize; i++)
 267                     bufPtr[bufOffset + i] += dstPtr[offset + i];
 268             }
 269
 270             //divide by computed sum
 271             for (size_t cnDim = 0; cnDim < channels; cnDim++)
 272             {
 273                 const int offset = srcOffset + cnDim * cnStep;
 274                 for (size_t i = 0; i < innerSize; i++)
 275                     dstPtr[offset + i] /= bufPtr[bufOffset + i];
 276             }
 277             if (logSoftMax)
 278             {
 279                 for (size_t cnDim = 0; cnDim < channels; cnDim++)
 280                 {
 281                     const int offset = srcOffset + cnDim * cnStep;
 282                     for (size_t i = 0; i < innerSize; i++)
 283                         dstPtr[offset + i] = log(dstPtr[offset + i]);
 284                 }
 285             }
 286         }
 287     }
 288
 289     virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
 290     {
 291 #ifdef HAVE_VULKAN
 292         vkcom::Tensor in = VkComTensor(inputs[0]);
 293         int cAxis = clamp(axisRaw, in.dimNum());
 294         std::shared_ptr<vkcom::OpBase> op(new vkcom::OpSoftmax(cAxis, logSoftMax));
 295         return Ptr<BackendNode>(new VkComBackendNode(inputs, op));
 296 #endif  // HAVE_VULKAN
 297         return Ptr<BackendNode>();
 298     }
 299
 300
 301     virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
 302     {
 303 #ifdef HAVE_HALIDE
 304         Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
 305         int inW, inH, inC, inN;
 306         getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
 307
 308         if (inW != 1 || inH != 1)
 309             CV_Error(cv::Error::StsNotImplemented,
 310                      "Halide backend for SoftMax with spatial size "
 311                      "more than 1x1 is not implemented");
 312
 313         Halide::Var x("x"), y("y"), c("c"), n("n");
 314         Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
 315
 316         Halide::Func expInput("expInput");
 317         Halide::RDom r(0, inW, 0, inH, 0, inC);
 318         expInput(x, y, c, n) = exp(inputBuffer(x, y, c, n));
 319         Halide::Expr globalSum = sum(expInput(r.x, r.y, r.z, n));
 320         top(x, y, c, n) = expInput(x, y, c, n) / globalSum;
 321         return Ptr<BackendNode>(new HalideBackendNode(top));
 322 #endif  // HAVE_HALIDE
 323         return Ptr<BackendNode>();
 324     }
 325
 326 #ifdef HAVE_INF_ENGINE
 327     virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
 328     {
 329         InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
 330
 331         InferenceEngine::Builder::SoftMaxLayer ieLayer(name);
 332         ieLayer.setAxis(clamp(axisRaw, input->getDims().size()));
 333
 334         return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
 335     }
 336 #endif  // HAVE_INF_ENGINE
 337
 338     int64 getFLOPS(const std::vector<MatShape> &inputs,
 339                   const std::vector<MatShape> &outputs) const CV_OVERRIDE
 340     {
 341         CV_UNUSED(outputs); // suppress unused variable warning
 342         int64 flops = 0;
 343
 344         for (int i = 0; i < inputs.size(); i++)
 345         {
 346             flops += 4*total(inputs[i]);
 347         }
 348
 349         return flops;
 350     }
 351
 352     int axisRaw;
 353 };
 354
 355 Ptr<SoftmaxLayer> SoftmaxLayer::create(const LayerParams& params)
 356 {
 357     return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(params));
 358 }
 359
 360 }
 361 }