arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h

   1 /*
   2  * Copyright (c) 2017-2018 ARM Limited.
   3  *
   4  * SPDX-License-Identifier: MIT
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to
   8  * deal in the Software without restriction, including without limitation the
   9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10  * sell copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in all
  14  * copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24 #ifndef __ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__
  25 #define __ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__
  26
  27 #include "arm_compute/core/CL/ICLSimple3DKernel.h"
  28
  29 #include <tuple>
  30
  31 namespace arm_compute
  32 {
  33 class ICLTensor;
  34
  35 /** Interface for the identifying the max value of 1D Logits */
  36 class CLLogits1DMaxKernel : public ICLSimple3DKernel
  37 {
  38 public:
  39     /** Set the input and output tensors.
  40      *
  41      * @param[in]  input  Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32
  42      * @param[out] output Destination tensor. Data types supported: same as @p input
  43      */
  44     void configure(const ICLTensor *input, ICLTensor *output);
  45     /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxKernel
  46      *
  47      * @param[in] input  Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32
  48      * @param[in] output Destination tensor. Data types supported: same as @p input
  49      *
  50      * @return a status
  51      */
  52     static Status validate(const ITensorInfo *input, const ITensorInfo *output);
  53 };
  54
  55 /** Interface for shifting, exponentiating and summing the logits */
  56 class CLLogits1DShiftExpSumKernel : public ICLKernel
  57 {
  58 public:
  59     /** Default constructor */
  60     CLLogits1DShiftExpSumKernel();
  61     /** Prevent instances of this class from being copied (As this class contains pointers) */
  62     CLLogits1DShiftExpSumKernel(const CLLogits1DShiftExpSumKernel &) = delete;
  63     /** Prevent instances of this class from being copied (As this class contains pointers) */
  64     CLLogits1DShiftExpSumKernel &operator=(const CLLogits1DShiftExpSumKernel &) = delete;
  65     /** Allow instances of this class to be moved */
  66     CLLogits1DShiftExpSumKernel(CLLogits1DShiftExpSumKernel &&) = default;
  67     /** Allow instances of this class to be moved */
  68     CLLogits1DShiftExpSumKernel &operator=(CLLogits1DShiftExpSumKernel &&) = default;
  69     /** Set the input and output tensors.
  70      *
  71      * @param[in]  input  Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32
  72      * @param[in]  max    Max values tensor. Data types supported: same as @p input
  73      * @param[out] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
  74      * @param[out] sum    Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
  75      * @param[in]  beta   (Optional) A scaling factor for the exponent. Defaults to 1.0
  76      */
  77     void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
  78     /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DShiftExpSumKernel
  79      *
  80      * @param[in] input  Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32
  81      * @param[in] max    Max values tensor. Data types supported: same as @p input
  82      * @param[in] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
  83      * @param[in] sum    Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
  84      *
  85      * @return a status
  86      */
  87     static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum);
  88
  89     // Inherited methods overridden:
  90     void run(const Window &window, cl::CommandQueue &queue) override;
  91
  92 private:
  93     const ICLTensor *_input;
  94     const ICLTensor *_max;
  95     ICLTensor       *_output;
  96     ICLTensor       *_sum;
  97 };
  98
  99 /** Interface for max, shifting, exponentiating and summing the logits */
 100 class CLLogits1DMaxShiftExpSumKernel : public ICLKernel
 101 {
 102 public:
 103     /** Info for whether a parallel reduction will be run and the vector size of the execution. */
 104     using ParallelReductionInfo = std::tuple<bool, unsigned int>;
 105
 106 public:
 107     /** Default constructor */
 108     CLLogits1DMaxShiftExpSumKernel();
 109     /** Prevent instances of this class from being copied (As this class contains pointers) */
 110     CLLogits1DMaxShiftExpSumKernel(const CLLogits1DMaxShiftExpSumKernel &) = delete;
 111     /** Prevent instances of this class from being copied (As this class contains pointers) */
 112     CLLogits1DMaxShiftExpSumKernel &operator=(const CLLogits1DMaxShiftExpSumKernel &) = delete;
 113     /** Allow instances of this class to be moved */
 114     CLLogits1DMaxShiftExpSumKernel(CLLogits1DMaxShiftExpSumKernel &&) = default;
 115     /** Allow instances of this class to be moved */
 116     CLLogits1DMaxShiftExpSumKernel &operator=(CLLogits1DMaxShiftExpSumKernel &&) = default;
 117     /** Set the input and output tensors.
 118      *
 119      * @param[in]     input  Source tensor. Data types supported: QS8/QS16/F16/F32
 120      * @param[in,out] max    Max values tensor. Data types supported: same as @p input
 121      * @param[out]    output Destination tensor. Data types supported: same as @p input
 122      * @param[out]    sum    Sum of 1D logits tensor. Data types supported: same as @p input
 123      * @param[in]     beta   (Optional) A scaling factor for the exponent. Defaults to 1.f
 124      */
 125     void configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
 126     /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel
 127      *
 128      * @param[in] input  Source tensor. Data types supported: QS8/QS16/F16/F32
 129      * @param[in] max    Max values tensor. Data types supported: same as @p input
 130      * @param[in] output Destination tensor. Data types supported: same as @p input
 131      * @param[in] sum    Sum of 1D logits tensor. Data types supported: same as @p input
 132      *
 133      * @return a status
 134      */
 135     static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum);
 136     /** Checks if the given size is eligible for parallel reduction
 137      *
 138      * @note  Serial reduction is launched for width < (_grid_size * _serial_vector_size).
 139      * @note  Parallel reduction is launched for width >= (_grid_size * _serial_vector_size) and vector_size is forced to 4.
 140      *
 141      * @param[in] size Size to check
 142      *
 143      * @return A two-element tuple where the first element is a boolean specifying if a parallel reduction will be run,
 144      *         while the second element is the vector size of the execution.
 145      */
 146     static ParallelReductionInfo is_parallel_reduction(size_t size);
 147
 148     // Inherited methods overridden:
 149     void run(const Window &window, cl::CommandQueue &queue) override;
 150
 151 private:
 152     const ICLTensor *_input;
 153     ICLTensor       *_max;
 154     ICLTensor       *_output;
 155     ICLTensor       *_sum;
 156
 157 private:
 158     static const unsigned int _grid_size;
 159     static const unsigned int _serial_vector_size;
 160     static const unsigned int _parallel_vector_size;
 161 };
 162 /** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */
 163 class CLLogits1DNormKernel : public ICLKernel
 164 {
 165 public:
 166     /** Default constructor */
 167     CLLogits1DNormKernel();
 168     /** Prevent instances of this class from being copied (As this class contains pointers) */
 169     CLLogits1DNormKernel(const CLLogits1DNormKernel &) = delete;
 170     /** Prevent instances of this class from being copied (As this class contains pointers) */
 171     CLLogits1DNormKernel &operator=(const CLLogits1DNormKernel &) = delete;
 172     /** Allow instances of this class to be moved */
 173     CLLogits1DNormKernel(CLLogits1DNormKernel &&) = default;
 174     /** Allow instances of this class to be moved */
 175     CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default;
 176     /** Set the input and output tensors.
 177      *
 178      * @param[in]  input  Source tensor. Data types supported: QS8/QS16/S32/F16/F32
 179      * @param[in]  sum    Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
 180      * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
 181      * @param[in]  beta   (Optional) A scaling factor for the exponent. (Default = 1.0)
 182      */
 183     void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, float beta = 1.0f);
 184     /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DNormKernel
 185      *
 186      * @param[in] input  Source tensor. Data types supported: QS8/QS16/S32/F16/F32
 187      * @param[in] sum    Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
 188      * @param[in] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
 189      *
 190      * @return a status
 191      */
 192     static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output);
 193
 194     // Inherited methods overridden:
 195     void run(const Window &window, cl::CommandQueue &queue) override;
 196
 197 private:
 198     const ICLTensor *_input;
 199     const ICLTensor *_sum;
 200     ICLTensor       *_output;
 201 };
 202 } // namespace arm_compute
 203 #endif /*__ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__ */