arm_compute v18.05

[platform/upstream/armcl.git] / arm_compute / runtime / NEON / functions / NEGEMMConvolutionLayer.h
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h

index ac5f4ca..d64fd9e 100644 (file)
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -26,6 +26,7 @@
  
  #include "arm_compute/runtime/IFunction.h"
  
+#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
  #include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
  #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
  #include "arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h"
@@ -36,6 +37,8 @@
  #include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
  #include "arm_compute/core/Types.h"
  #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/AssemblyHelper.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
  #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
  #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
  #include "arm_compute/runtime/Tensor.h"
@@ -94,13 +97,21 @@ private:
   * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
   * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8Scale (if quantized asymmetric)
   * -# @ref NECol2ImKernel
+ * -# @ref NEActivationLayer (executed only if the activation layer is enabled)
   */
  class NEGEMMConvolutionLayer : public IFunction
  {
  public:
      /** Constructor */
      NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr);
-
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMConvolutionLayer(const NEGEMMConvolutionLayer &) = delete;
+    /** Default move constructor */
+    NEGEMMConvolutionLayer(NEGEMMConvolutionLayer &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMConvolutionLayer &operator=(const NEGEMMConvolutionLayer &) = delete;
+    /** Default move assignment operator */
+    NEGEMMConvolutionLayer &operator=(NEGEMMConvolutionLayer &&) = default;
      /** Set the input and output tensors.
       *
       * @param[in]  input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -114,8 +125,11 @@ public:
       * @param[in]  conv_info    Contains padding and stride information described in @ref PadStrideInfo.
       * @param[in]  weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
       *                          tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+     * @param[in]  dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  act_info     (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
       */
-    void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo());
+    void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
+                   const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
       *
       * @param[in] input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -129,11 +143,13 @@ public:
       * @param[in] conv_info    Contains padding and stride information described in @ref PadStrideInfo.
       * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
       *                         tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+     * @param[in] dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in] act_info     (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
       *
       * @return a status
       */
      static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                           const WeightsInfo &weights_info = WeightsInfo());
+                           const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo());
  
      // Inherited methods overridden:
      void run() override;
@@ -149,25 +165,21 @@ private:
       * @param[in]  reshape_info   (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
       */
      void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo());
-    /** Prepare the appropriate assembly optimized kernel
-     *
-     * @param[in] ci CPU information
-     * @param[in] M  M parameter of matrix multiplication
-     * @param[in] N  N parameter of matrix multiplication
-     * @param[in] K  K parameter of matrix multiplication
-     */
-    void configure_asm_mm(const struct CPUInfo &ci, int M, int N, int K);
  
  private:
+    AssemblyKernelGlueF32                               _asm_glue;
      MemoryGroup                                         _memory_group;
      NEIm2ColKernel                                      _input_im2col_kernel;
      NEGEMMInterleave4x4Kernel                           _input_interleave_kernel;
      NEConvolutionLayerReshapeWeights                    _reshape_weights;
      NEGEMMMatrixMultiplyKernel                          _mm_kernel;
-    std::unique_ptr<NEGEMMAssemblyBaseKernel>           _mm_optimised_kernel;
      NEGEMMLowpMatrixMultiplyCore                        _mm_gemmlowp;
      NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage;
      NECol2ImKernel                                      _output_col2im_kernel;
+    NEActivationLayer                                   _activationlayer_function;
+    NEArithmeticAdditionKernel                          _add_bias_kernel;
+
+    const ITensor *_original_weights;
  
      Tensor _input_im2col_reshaped;
      Tensor _input_interleaved_reshaped;
@@ -175,12 +187,16 @@ private:
      Tensor _gemm_output;
      Tensor _tmp_output;
      Tensor _workspace;
+    Tensor _B_pretransposed;
  
-    bool _append_bias;
-    bool _is_fully_connected_convolution;
-    bool _are_weights_reshaped;
-    bool _is_quantized;
-    bool _is_interleaved;
+    DataLayout _data_layout;
+    bool       _append_bias;
+    bool       _is_fully_connected_convolution;
+    bool       _are_weights_reshaped;
+    bool       _is_quantized;
+    bool       _is_interleaved;
+    bool       _is_activationlayer_enabled;
+    bool       _skip_im2col;
  };
  }
  #endif /* __ARM_COMPUTE_NECONVOLUTIONGEMMLAYER_H__ */