arm_compute v18.05
[platform/upstream/armcl.git] / arm_compute / runtime / NEON / functions / NEGEMMConvolutionLayer.h
index ac5f4ca..d64fd9e 100644 (file)
@@ -26,6 +26,7 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
+#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
 #include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
 #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h"
@@ -36,6 +37,8 @@
 #include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/AssemblyHelper.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
 #include "arm_compute/runtime/Tensor.h"
@@ -94,13 +97,21 @@ private:
  * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
  * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8Scale (if quantized asymmetric)
  * -# @ref NECol2ImKernel
+ * -# @ref NEActivationLayer (executed only if the activation layer is enabled)
  */
 class NEGEMMConvolutionLayer : public IFunction
 {
 public:
     /** Constructor */
     NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr);
-
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMConvolutionLayer(const NEGEMMConvolutionLayer &) = delete;
+    /** Default move constructor */
+    NEGEMMConvolutionLayer(NEGEMMConvolutionLayer &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMConvolutionLayer &operator=(const NEGEMMConvolutionLayer &) = delete;
+    /** Default move assignment operator */
+    NEGEMMConvolutionLayer &operator=(NEGEMMConvolutionLayer &&) = default;
     /** Set the input and output tensors.
      *
      * @param[in]  input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -114,8 +125,11 @@ public:
      * @param[in]  conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in]  weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
      *                          tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+     * @param[in]  dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  act_info     (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
      */
-    void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo());
+    void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
+                   const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
      *
      * @param[in] input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -129,11 +143,13 @@ public:
      * @param[in] conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
      *                         tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+     * @param[in] dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in] act_info     (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                           const WeightsInfo &weights_info = WeightsInfo());
+                           const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
     void run() override;
@@ -149,25 +165,21 @@ private:
      * @param[in]  reshape_info   (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
      */
     void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo());
-    /** Prepare the appropriate assembly optimized kernel
-     *
-     * @param[in] ci CPU information
-     * @param[in] M  M parameter of matrix multiplication
-     * @param[in] N  N parameter of matrix multiplication
-     * @param[in] K  K parameter of matrix multiplication
-     */
-    void configure_asm_mm(const struct CPUInfo &ci, int M, int N, int K);
 
 private:
+    AssemblyKernelGlueF32                               _asm_glue;
     MemoryGroup                                         _memory_group;
     NEIm2ColKernel                                      _input_im2col_kernel;
     NEGEMMInterleave4x4Kernel                           _input_interleave_kernel;
     NEConvolutionLayerReshapeWeights                    _reshape_weights;
     NEGEMMMatrixMultiplyKernel                          _mm_kernel;
-    std::unique_ptr<NEGEMMAssemblyBaseKernel>           _mm_optimised_kernel;
     NEGEMMLowpMatrixMultiplyCore                        _mm_gemmlowp;
     NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage;
     NECol2ImKernel                                      _output_col2im_kernel;
+    NEActivationLayer                                   _activationlayer_function;
+    NEArithmeticAdditionKernel                          _add_bias_kernel;
+
+    const ITensor *_original_weights;
 
     Tensor _input_im2col_reshaped;
     Tensor _input_interleaved_reshaped;
@@ -175,12 +187,16 @@ private:
     Tensor _gemm_output;
     Tensor _tmp_output;
     Tensor _workspace;
+    Tensor _B_pretransposed;
 
-    bool _append_bias;
-    bool _is_fully_connected_convolution;
-    bool _are_weights_reshaped;
-    bool _is_quantized;
-    bool _is_interleaved;
+    DataLayout _data_layout;
+    bool       _append_bias;
+    bool       _is_fully_connected_convolution;
+    bool       _are_weights_reshaped;
+    bool       _is_quantized;
+    bool       _is_interleaved;
+    bool       _is_activationlayer_enabled;
+    bool       _skip_im2col;
 };
 }
 #endif /* __ARM_COMPUTE_NECONVOLUTIONGEMMLAYER_H__ */