COMPMID-3097 Fuse activation with fully connected layer CL

author Giorgio Arena <giorgio.arena@arm.com>

Fri, 7 Feb 2020 13:46:45 +0000 (13:46 +0000)

committer Giorgio Arena <giorgio.arena@arm.com>

Mon, 2 Mar 2020 15:51:39 +0000 (15:51 +0000)
author Giorgio Arena <giorgio.arena@arm.com>
Fri, 7 Feb 2020 13:46:45 +0000 (13:46 +0000)
committer Giorgio Arena <giorgio.arena@arm.com>
Mon, 2 Mar 2020 15:51:39 +0000 (15:51 +0000)
diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h

index c5f6608163a88e5af3a891202e9545c497afcc9f..31bc55098a4c0e24c44209cd7ef47266a515e479 100644 (file)
--- a/arm_compute/core/PixelValue.h
+++ b/arm_compute/core/PixelValue.h
@@ -41,11 +41,11 @@ public:
      }
      /** Initialize the union with a pixel value of chosen datatype
       *
-     * @param[in] v        int value.
+     * @param[in] v        value.
       * @param[in] datatype DataType that @p v have to be stored
       * @param[in] qinfo    (Optional) QuantizationInfo to apply in case of quantized data types to @p v
       */
-    PixelValue(int64_t v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo())
+    PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo())
          : PixelValue()
      {
          switch(datatype)
@@ -57,13 +57,13 @@ public:
                  value.s8 = static_cast<int8_t>(v);
                  break;
              case DataType::QASYMM8:
-                value.u8 = quantize_qasymm8(static_cast<uint8_t>(v), qinfo);
+                value.u8 = quantize_qasymm8(static_cast<float>(v), qinfo);
                  break;
              case DataType::QASYMM8_SIGNED:
-                value.s8 = quantize_qasymm8_signed(static_cast<int8_t>(v), qinfo);
+                value.s8 = quantize_qasymm8_signed(static_cast<float>(v), qinfo);
                  break;
              case DataType::QSYMM8:
-                value.s8 = quantize_qsymm8(static_cast<int8_t>(v), qinfo);
+                value.s8 = quantize_qsymm8(static_cast<float>(v), qinfo);
                  break;
              case DataType::U16:
                  value.u16 = static_cast<uint16_t>(v);
@@ -72,10 +72,10 @@ public:
                  value.s16 = static_cast<int16_t>(v);
                  break;
              case DataType::QASYMM16:
-                value.u16 = quantize_qasymm16(static_cast<uint16_t>(v), qinfo);
+                value.u16 = quantize_qasymm16(static_cast<float>(v), qinfo);
                  break;
              case DataType::QSYMM16:
-                value.s16 = quantize_qsymm16(static_cast<int16_t>(v), qinfo);
+                value.s16 = quantize_qsymm16(static_cast<float>(v), qinfo);
                  break;
              case DataType::U32:
                  value.u32 = static_cast<uint32_t>(v);
@@ -96,10 +96,8 @@ public:
                  value.f32 = static_cast<float>(v);
                  break;
              case DataType::F64:
-                value.f64 = static_cast<double>(v);
-                break;
              default:
-                value.s64 = v;
+                value.f64 = v;
                  break;
          }
      }
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h

index 2030b171c61c8ca8517bb057d1a104ed8087f309..cf689d757c617808768fa7565783cca6d7bff3bd 100644 (file)
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -799,39 +799,6 @@ private:
      DimensionRoundingType _round_type;
  };
  
-/** Fully connected layer info */
-struct FullyConnectedLayerInfo
-{
-    DataLayout weights_trained_layout{ DataLayout::NCHW }; /**<  Layout that the weights have been trained with. */
-    bool       transpose_weights{ true };                  /**<  Transpose weights if true. */
-    bool       are_weights_reshaped{ false };              /**<  Reshape the weights tensor if false. */
-    bool       retain_internal_weights{ false };           /**<  Retain internal reshaped weights. */
-    bool       fp_mixed_precision{ false };                /**<  Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */
-
-    /** Sets the weights trained data layout
-     *
-     * @param[in] layout Data layout that the weights were trained with
-     *
-     * @return Updated object
-     */
-    FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout)
-    {
-        weights_trained_layout = layout;
-        return *this;
-    }
-    /** Sets the transpose weights flag
-     *
-     * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed
-     *
-     * @return Updated object
-     */
-    FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights)
-    {
-        transpose_weights = should_transpose_weights;
-        return *this;
-    }
-};
-
  /** PriorBox layer info */
  class PriorBoxLayerInfo final
  {
@@ -1674,6 +1641,40 @@ private:
      bool               _enabled = { false };
  };
  
+/** Fully connected layer info */
+struct FullyConnectedLayerInfo
+{
+    DataLayout          weights_trained_layout{ DataLayout::NCHW }; /**<  Layout that the weights have been trained with. */
+    bool                transpose_weights{ true };                  /**<  Transpose weights if true. */
+    bool                are_weights_reshaped{ false };              /**<  Reshape the weights tensor if false. */
+    bool                retain_internal_weights{ false };           /**<  Retain internal reshaped weights. */
+    bool                fp_mixed_precision{ false };                /**<  Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */
+    ActivationLayerInfo activation_info{};                          /**<  Fused activation to apply after the matrix multiplication. */
+
+    /** Sets the weights trained data layout
+     *
+     * @param[in] layout Data layout that the weights were trained with
+     *
+     * @return Updated object
+     */
+    FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout)
+    {
+        weights_trained_layout = layout;
+        return *this;
+    }
+    /** Sets the transpose weights flag
+     *
+     * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed
+     *
+     * @return Updated object
+     */
+    FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights)
+    {
+        transpose_weights = should_transpose_weights;
+        return *this;
+    }
+};
+
  /** Normalization Layer Information class */
  class NormalizationLayerInfo
  {
@@ -1944,16 +1945,16 @@ enum class GEMMLowpOutputStageType
  /** GEMMLowp output stage info */
  struct GEMMLowpOutputStageInfo
  {
-    GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */
-    int32_t                 gemmlowp_offset{ 0 };                  /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */
-    int32_t                 gemmlowp_multiplier{ 0 };              /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
-    int32_t                 gemmlowp_shift{ 0 };                   /**< GEMMLowp output stage shift used for quantizing to uint8 */
-    int32_t                 gemmlowp_min_bound{ 0 };               /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */
-    int32_t                 gemmlowp_max_bound{ 0 };               /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
-    std::vector<int32_t>    gemmlowp_multipliers{};                /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
-    std::vector<int32_t>    gemmlowp_shifts{};                     /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
-    bool                    is_quantized_per_channel{ false };     /**< GEMMLowp quantized per-channel flag */
-    DataType                output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */
+    GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE };                        /**< GEMMLowp output stage type */
+    int32_t                 gemmlowp_offset{ 0 };                                         /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */
+    int32_t                 gemmlowp_multiplier{ 0 };                                     /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+    int32_t                 gemmlowp_shift{ 0 };                                          /**< GEMMLowp output stage shift used for quantizing to uint8 */
+    int32_t                 gemmlowp_min_bound{ std::numeric_limits<int32_t>::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */
+    int32_t                 gemmlowp_max_bound{ std::numeric_limits<int32_t>::max() };    /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
+    std::vector<int32_t>    gemmlowp_multipliers{};                                       /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+    std::vector<int32_t>    gemmlowp_shifts{};                                            /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
+    bool                    is_quantized_per_channel{ false };                            /**< GEMMLowp quantized per-channel flag */
+    DataType                output_data_type{ DataType::UNKNOWN };                        /**< Output tensor data type to use if the output is not initialized */
  };
  
  /** GEMM LHS (Left Hand Side) matrix information */
diff --git a/arm_compute/graph/nodes/FullyConnectedLayerNode.h b/arm_compute/graph/nodes/FullyConnectedLayerNode.h

index 41a7bc994686d66e3628341f48455d650b1d19bf..10c310dda2feb2875a434b322c7853cccf51c4b6 100644 (file)
--- a/arm_compute/graph/nodes/FullyConnectedLayerNode.h
+++ b/arm_compute/graph/nodes/FullyConnectedLayerNode.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -43,6 +43,11 @@ public:
      FullyConnectedLayerNode(unsigned int            num_outputs,
                              QuantizationInfo        out_quant_info = QuantizationInfo(),
                              FullyConnectedLayerInfo fc_info        = FullyConnectedLayerInfo());
+    /** Sets fused activation
+     *
+     * @param[in] fused_activation Fused activation to set
+     */
+    void set_fused_activation(ActivationLayerInfo fused_activation);
      /** Computes weights descriptor
       *
       * @warning Works for inputs with 1D batch space
@@ -83,6 +88,8 @@ public:
      TensorDescriptor configure_output(size_t idx) const override;
      void accept(INodeVisitor &v) override;
  
+    static constexpr NodeType node_type = NodeType::FullyConnectedLayer;
+
  private:
      unsigned int            _num_outputs;
      QuantizationInfo        _out_quant_info;
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h

index f453879fd8843bf2944d1a9cf5b71cbf89fa179f..564135eed866a96c50ef13dba3cf9ff0ba91db60 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -68,24 +68,25 @@ public:
       * @param[in]  result_offset   Offset to be added to each element of the input matrix
       * @param[in]  result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add
       * @param[in]  result_shift    Number of bits to shift right the result before converting back to QASYMM8
-     * @param[in]  min             (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+     * @param[in]  min             (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
       * @param[in]  max             (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
-     *                             Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     *                             Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       */
-    void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, int min = 0, int max = 0);
+    void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
+                   int max = std::numeric_limits<int32_t>::max());
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8Scale
       *
       * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
       * @param[in] bias   Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
       *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
       * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
-     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
       * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
-     *                   Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     *                   Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       *
       * @return a status
       */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
  };
  
  /** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on OpenCL.
@@ -128,25 +129,25 @@ public:
       * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
       * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
       * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8
-     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
       * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
-     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       */
      void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
-                   int min = 0, int max = 0);
+                   int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
       *
       * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
       * @param[in] bias   Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
       *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
       * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
-     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
       * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
-     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       *
       * @return a status
       */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
  };
  
  /** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on OpenCL.
@@ -189,25 +190,25 @@ public:
       * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
       * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
       * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8_SIGNED
-     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
       * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
-     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       */
      void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
-                   int min = 0, int max = 0);
+                   int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
       *
       * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
       * @param[in] bias   Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
       *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
       * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
-     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0
+     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
       * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
-     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       *
       * @return a status
       */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
  };
  
  /** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat on OpenCL.
@@ -230,24 +231,25 @@ public:
       * @param[out] output     Output tensor. Data type supported: Data type supported: QASYMM8
       * @param[in]  multiplier Float multiplier to be multiplied to each element of the input matrix
       * @param[in]  offset     Offset to be applied to result before converting it back to QASYMM8
-     * @param[in]  min        (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+     * @param[in]  min        (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
       * @param[in]  max        (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
-     *                        Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     *                        Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       */
-    void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = 0, int max = 0);
+    void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = std::numeric_limits<int32_t>::lowest(),
+                   int max = std::numeric_limits<int32_t>::max());
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
       *
       * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
       * @param[in] bias   Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
       *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
       * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
-     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
       * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
-     *                   Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     *                   Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       *
       * @return a status
       */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
  };
  /** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on OpenCL.
   *
@@ -288,24 +290,25 @@ public:
       * @param[out] output                       Output tensor. Data type supported: Data type supported: QSYMM16
       * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
       * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
-     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
       * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
-     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       */
-    void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
+    void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
+                   int max = std::numeric_limits<int32_t>::max());
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint
       *
       * @param[in] input  Input tensor info. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
       * @param[in] bias   Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
       *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
       * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16
-     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
       * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
-     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       *
       * @return a status
       */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
  };
  } // namespace arm_compute
  #endif /*ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H */
 \ No newline at end of file
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h

index ca2cbbc268395e8a2f16d3481b38f07ec496e62e..283b052917c8d6b46f107b60fd0635ced55f6e16 100644 (file)
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
@@ -68,24 +68,25 @@ public:
       * @param[in]  result_offset   Offset to be added to each element of the input matrix
       * @param[in]  result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add
       * @param[in]  result_shift    Number of bits to shift right the result before converting back to QASYMM8
-     * @param[in]  min             (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+     * @param[in]  min             (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
       * @param[in]  max             (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
-     *                             Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     *                             Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       */
-    void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_offset, int result_mult_int, int result_shift, int min = 0, int max = 0);
+    void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
+                   int max = std::numeric_limits<int32_t>::max());
      /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8Scale
       *
       * @param[in] input  Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
       * @param[in] bias   Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
       *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
       * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
-     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
       * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
-     *                   Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     *                   Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       *
       * @return a status
       */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
  };
  
  /** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on NEON.
@@ -128,24 +129,25 @@ public:
       * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
       * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
       * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8
-     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
       * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
-     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       */
-    void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0);
+    void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
+                   int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
      /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
       *
       * @param[in] input  Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
       * @param[in] bias   Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
       *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
       * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
-     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
       * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
-     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       *
       * @return a status
       */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
  };
  /** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on NEON.
   *
@@ -187,24 +189,25 @@ public:
       * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
       * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
       * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8_SIGNED
-     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
       * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
-     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       */
-    void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0);
+    void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
+                   int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
      /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
       *
       * @param[in] input  Input tensor. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
       * @param[in] bias   Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
       *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
       * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
-     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
+     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
       * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
-     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       *
       * @return a status
       */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
  };
  /** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on NEON.
   *
@@ -245,24 +248,25 @@ public:
       * @param[out] output                       Output tensor. Data type supported: Data type supported: QSYMM16
       * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
       * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
-     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
       * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
-     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       */
-    void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
+    void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
+                   int max = std::numeric_limits<int32_t>::max());
      /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
       *
       * @param[in] input  Input tensor info. It is the output of @ref NEGEMMLowpMatrixMultiplyCore function. Data type supported: S32
       * @param[in] bias   Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
       *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
       * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16
-     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
       * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
-     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
       *
       * @return a status
       */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
  };
  
  /** Basic function to execute GEMMLowpQuantizeDown kernels on NEON.
diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp

index 5550003f33fcdaa19981d31bdc0607cf693bc8c2..b9563553b8d38e353867d44b1b76db41e6e88f72 100644 (file)
--- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp
@@ -113,22 +113,9 @@ Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vecto
          ARM_COMPUTE_RETURN_ERROR_ON(output_stage.output_data_type != output->data_type());
          ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
          ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mm_result, output);
-        PixelValue min_val{};
-        PixelValue max_val{};
-        std::tie(min_val, max_val) = get_min_max(output->data_type());
-        ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > max_val.get<int32_t>());
-        ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < min_val.get<int32_t>() || output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound);
-    }
-    else
-    {
-        // Output will be configured as depending on the chosen output data type in the output stage
-        PixelValue min_val{};
-        PixelValue max_val{};
-        std::tie(min_val, max_val) = get_min_max(output_stage.output_data_type);
-        ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > max_val.get<int32_t>());
-        ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < min_val.get<int32_t>() || output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound);
      }
  
+    ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound);
      ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_stage.gemmlowp_multipliers.size() != output_stage.gemmlowp_shifts.size(), "per channel quantization info is incorrect");
  
      return Status{};
@@ -248,8 +235,8 @@ void CLGEMMLowpOffsetContributionOutputStageKernel::configure(const ICLTensor *m
      PixelValue min_val{};
      PixelValue max_val{};
      std::tie(min_val, max_val) = get_min_max(output->info()->data_type());
-    build_opts.add_option_if((min != min_val.get<int32_t>()) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min));
-    build_opts.add_option_if((max != max_val.get<int32_t>()) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max));
+    build_opts.add_option_if((min > min_val.get<int32_t>()), "-DMIN_BOUND=" + support::cpp11::to_string(min));
+    build_opts.add_option_if((max < max_val.get<int32_t>()), "-DMAX_BOUND=" + support::cpp11::to_string(max));
  
      std::string kernel_name("gemmlowp_offset_contribution");
      kernel_name += "_" + string_from_gemmlowp_output_stage(output_stage.type);
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp

index 872012336658390509ab9f82586fbf17c5b2a3c2..1bc7fe3946ba9b5d160c66751d4582f0294b53a1 100644 (file)
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -45,8 +45,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
                            int min, int max)
  {
      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
-    ARM_COMPUTE_RETURN_ERROR_ON(max > 32767);
-    ARM_COMPUTE_RETURN_ERROR_ON(min < -32768 || min > max);
+    ARM_COMPUTE_RETURN_ERROR_ON(min > max);
  
      // Check biases if exist
      if(bias != nullptr)
@@ -137,8 +136,8 @@ void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::configure(const
      CLBuildOptions build_opts;
      build_opts.add_option("-DRESULT_FIXEDPOINT_MULTIPLIER=" + support::cpp11::to_string(result_fixedpoint_multiplier));
      build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift));
-    build_opts.add_option_if((min != -32768) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min));
-    build_opts.add_option_if((max != 32767) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max));
+    build_opts.add_option_if((min > -32768), "-DMIN_BOUND=" + support::cpp11::to_string(min));
+    build_opts.add_option_if((max < 32767), "-DMAX_BOUND=" + support::cpp11::to_string(max));
      build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
  
      // Create kernel
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp

index 8a5ce9fa8788e0612daaf603f909053b795c94c4..e207fcb1b0745f081ad84e5b7f446ff82286027f 100644 (file)
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp
@@ -44,8 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
                            int min, int max)
  {
      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
-    ARM_COMPUTE_RETURN_ERROR_ON(max > 127);
-    ARM_COMPUTE_RETURN_ERROR_ON(min < -128 || min > max);
+    ARM_COMPUTE_RETURN_ERROR_ON(min > max);
  
      // Check biases if exist
      if(bias != nullptr)
@@ -136,8 +135,8 @@ void CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(const I
      build_opts.add_option("-DRESULT_FIXEDPOINT_MULTIPLIER=" + support::cpp11::to_string(result_fixedpoint_multiplier));
      build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift));
      build_opts.add_option("-DOUTPUT_DATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
-    build_opts.add_option_if((min != -128) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min));
-    build_opts.add_option_if((max != 127) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max));
+    build_opts.add_option_if((min > -128), "-DMIN_BOUND=" + support::cpp11::to_string(min));
+    build_opts.add_option_if((max < 127), "-DMAX_BOUND=" + support::cpp11::to_string(max));
      build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
  
      // Create kernel
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp

index dc04fed96ea52693b33c635e030c807916244a79..7601d7ee779b3c53a01e4553053ce2a68631c9a6 100644 (file)
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
@@ -44,8 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
                            int min, int max)
  {
      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
-    ARM_COMPUTE_RETURN_ERROR_ON(max > 255);
-    ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max);
+    ARM_COMPUTE_RETURN_ERROR_ON(min > max);
  
      // Check biases if exist
      if(bias != nullptr)
@@ -136,8 +135,8 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const
      build_opts.add_option("-DRESULT_FIXEDPOINT_MULTIPLIER=" + support::cpp11::to_string(result_fixedpoint_multiplier));
      build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift));
      build_opts.add_option("-DOUTPUT_DATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
-    build_opts.add_option_if((min != 0) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min));
-    build_opts.add_option_if((max != 255) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max));
+    build_opts.add_option_if((min > 0), "-DMIN_BOUND=" + support::cpp11::to_string(min));
+    build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max));
      build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
  
      // Create kernel
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp

index ae096f295ce32716c36a9f2a7aadefdb9c8110c8..dd1be748f58856bc07ceb0149131c138314a0638 100644 (file)
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -43,8 +43,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
                            int min, int max)
  {
      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
-    ARM_COMPUTE_RETURN_ERROR_ON(max > 255);
-    ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max);
+    ARM_COMPUTE_RETURN_ERROR_ON(min > max);
  
      // Check biases if exist
      if(bias != nullptr)
@@ -132,8 +131,8 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel::configure(const ICLTe
      CLBuildOptions build_opts;
      build_opts.add_option("-DREAL_MULTIPLIER=" + float_to_string_with_full_precision(multiplier));
      build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(offset));
-    build_opts.add_option_if((min != 0) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min));
-    build_opts.add_option_if((max != 255) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max));
+    build_opts.add_option_if((min > 0), "-DMIN_BOUND=" + support::cpp11::to_string(min));
+    build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max));
      build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
  
      // Create kernel
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp

index 8175f6027584ee40fc65309eec745becb08a109a..7a22239a7c23329538804a2c5fd56e14d52d567b 100644 (file)
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -41,8 +41,7 @@ namespace
  Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max)
  {
      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
-    ARM_COMPUTE_RETURN_ERROR_ON(max > 255);
-    ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max);
+    ARM_COMPUTE_RETURN_ERROR_ON(min > max);
  
      // Check biases if exist
      if(bias != nullptr)
@@ -135,8 +134,8 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ICLTensor *i
      build_opts.add_option("-DRESULT_OFFSET=" + support::cpp11::to_string(result_offset));
      build_opts.add_option("-DRESULT_MULT_INT=" + support::cpp11::to_string(result_mult_int));
      build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(result_shift));
-    build_opts.add_option_if((min != 0) && (min != max), "-DMIN_BOUND=" + support::cpp11::to_string(min));
-    build_opts.add_option_if((max != 255) && (min != max), "-DMAX_BOUND=" + support::cpp11::to_string(max));
+    build_opts.add_option_if((min > 0), "-DMIN_BOUND=" + support::cpp11::to_string(min));
+    build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max));
      build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
  
      // Create kernel
diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp

index 5d2df6d2c904afbdee98e5e39674645b4750f44d..31414e3f3f53d4da289926821971abddba878592 100644 (file)
--- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp
@@ -778,15 +778,8 @@ Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vecto
                            int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage)
  {
      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(mm_result, 1, DataType::S32);
-    if(output->data_type() == DataType::QASYMM8)
+    if(output->data_type() != DataType::QASYMM8)
      {
-        ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > 255);
-        ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < 0);
-    }
-    else
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_max_bound > 127);
-        ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound < -128);
          ARM_COMPUTE_RETURN_ERROR_ON(mm_result->dimension(0) > 1 && output_stage.gemmlowp_multipliers.size() > 1 && b_offset != 0);
      }
      ARM_COMPUTE_RETURN_ERROR_ON(output_stage.gemmlowp_min_bound > output_stage.gemmlowp_max_bound);
@@ -914,7 +907,7 @@ get_configured_function(const ITensor *mm_result, const ITensor *vector_sum_row,
      std::tie(type_min, type_max) = get_min_max(output->info()->data_type());
      int32_t    type_min_int    = type_min.get<int32_t>();
      int32_t    type_max_int    = type_max.get<int32_t>();
-    const bool is_bounded_relu = !(output_stage.gemmlowp_min_bound == type_min_int && output_stage.gemmlowp_max_bound == type_max_int);
+    const bool is_bounded_relu = !(output_stage.gemmlowp_min_bound <= type_min_int && output_stage.gemmlowp_max_bound >= type_max_int);
  
      // Check if we need to perform fixed point requantization
      const bool is_fixed_point = output_stage.type != GEMMLowpOutputStageType::QUANTIZE_DOWN;
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp

index bc513e66184117359f028fab928bf2e6bb20ce43..058007139d2feccf7892e3b8c740e1657a918f9d 100644 (file)
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -46,8 +46,7 @@ namespace
  Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max)
  {
      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
-    ARM_COMPUTE_RETURN_ERROR_ON(max > 32767);
-    ARM_COMPUTE_RETURN_ERROR_ON(min < -32768 || min > max);
+    ARM_COMPUTE_RETURN_ERROR_ON(min > max);
  
      // Check biases if exist
      if(bias != nullptr)
@@ -213,7 +212,7 @@ void NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::configure(const
      INEKernel::configure(win_config.second);
  
      // Check if we need to clamp the result using min and max
-    const bool is_bounded_relu = ((min != max) && !(min == -32768 && max == 32767));
+    const bool is_bounded_relu = !(min <= -32768 && max >= 32767);
      _func                      = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::run<true> : &NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::run<false>;
  }
  
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp

index d24089d6155064afdea2b68c5c05defc8309b49c..b8ca17ec3d643852f25a5f96ebbe071f3cb65d35 100644 (file)
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -46,8 +46,7 @@ namespace
  Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max)
  {
      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
-    ARM_COMPUTE_RETURN_ERROR_ON(max > 127);
-    ARM_COMPUTE_RETURN_ERROR_ON(min < -128 || min > max);
+    ARM_COMPUTE_RETURN_ERROR_ON(min > max);
  
      // Check biases if exist
      if(bias != nullptr)
@@ -222,7 +221,7 @@ void NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(const I
      INEKernel::configure(win_config.second);
  
      // Check if we need to clamp the result using min and max
-    const bool is_bounded_relu = ((min != max) && !(min == -128 && max == 127));
+    const bool is_bounded_relu = !(min <= -128 && max >= 127);
      _func                      = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::run<true> : &NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::run<false>;
  }
  
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp

index bb0b86404e317edbf3a5060f1092f7c9af0938fa..4a9d2f748148971584a07cc063785421671e9dfe 100644 (file)
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -46,8 +46,7 @@ namespace
  Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max)
  {
      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
-    ARM_COMPUTE_RETURN_ERROR_ON(max > 255);
-    ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max);
+    ARM_COMPUTE_RETURN_ERROR_ON(min > max);
  
      // Check biases if exist
      if(bias != nullptr)
@@ -224,7 +223,7 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const
      INEKernel::configure(win_config.second);
  
      // Check if we need to clamp the result using min and max
-    const bool is_bounded_relu = ((min != max) && !(min == 0 && max == 255));
+    const bool is_bounded_relu = !(min <= 0 && max >= 255);
      _func                      = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run<true> : &NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run<false>;
  }
  
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp

index a221bd7925e1aa2ea49d638c6636ab8250b93898..a68e4e7efb7f8521ca00bd0dade4483a06a3f48b 100644 (file)
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -43,8 +43,7 @@ namespace
  Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max)
  {
      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
-    ARM_COMPUTE_RETURN_ERROR_ON(max > 255);
-    ARM_COMPUTE_RETURN_ERROR_ON(min < 0 || min > max);
+    ARM_COMPUTE_RETURN_ERROR_ON(min > max);
  
      // Check biases if exist
      if(bias != nullptr)
@@ -324,7 +323,7 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ITensor *inp
      INEKernel::configure(win_config.second);
  
      // Check if we need to clamp the result using min and max
-    const bool is_bounded_relu = ((min != max) && !(min == 0 && max == 255));
+    const bool is_bounded_relu = !(min <= 0 && max >= 255);
      _func                      = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run<true> : &NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run<false>;
  }
  
diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp

index b7f081dc426ee37f470e1f81824b4ca62f69297c..151a8bfa03c0d84e289983fe6469b20ff166e2a8 100644 (file)
--- a/src/graph/mutators/NodeFusionMutator.cpp
+++ b/src/graph/mutators/NodeFusionMutator.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -301,6 +301,10 @@ void NodeFusionMutator::mutate(Graph &g)
      {
          return true;
      };
+    auto cl_target_prec = [](INode & n)
+    {
+        return n.assigned_target() == Target::CL;
+    };
      auto qs8_prec = [&g](INode & n)
      {
          ARM_COMPUTE_ERROR_ON(n.output(0) == nullptr);
@@ -318,6 +322,7 @@ void NodeFusionMutator::mutate(Graph &g)
      detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations);
      detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations);
      detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>(g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations);
+    detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>(g, cl_target_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations);
      detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_convolution_with_batch_normalization);
      detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization);
  }
diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp

index 80fce7b8a13e74d1f4fb8462e5022e0e1308db92..34c432a1ceab56fc0a305891173c5f4487f31b75 100644 (file)
--- a/src/graph/nodes/FullyConnectedLayer.cpp
+++ b/src/graph/nodes/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -38,6 +38,11 @@ FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs, Quant
      _outputs.resize(1, NullTensorID);
  }
  
+void FullyConnectedLayerNode::set_fused_activation(ActivationLayerInfo fused_activation)
+{
+    _info.activation_info = fused_activation;
+}
+
  TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const TensorDescriptor &input_descriptor,
                                                                       unsigned int            num_outputs,
                                                                       FullyConnectedLayerInfo fc_info,
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp

index dcaa12645eb327f80f4b482f192fad4edcd7c7b4..9b7de8df1bc34de6331e664651a19a0d18e03489 100644 (file)
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -41,7 +41,7 @@ using namespace arm_compute::utils::cast;
  namespace
  {
  Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output,
-                                       GEMMLowpOutputStageInfo &gemmlowp_output_stage)
+                                       GEMMLowpOutputStageInfo &gemmlowp_output_stage, ActivationLayerInfo activation_info)
  {
      gemmlowp_output_stage.type                = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
      gemmlowp_output_stage.gemmlowp_offset     = 0;
@@ -53,13 +53,14 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn
      // Configure output stage for quantized case
      if(is_data_type_quantized_asymmetric(data_type))
      {
-        const UniformQuantizationInfo iq_info = input.quantization_info().uniform();
-        const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
-        const UniformQuantizationInfo oq_info = output.quantization_info().uniform();
+        const QuantizationInfo        oq_info = output.quantization_info();
+        const UniformQuantizationInfo iq_unif = input.quantization_info().uniform();
+        const UniformQuantizationInfo wq_unif = weights.quantization_info().uniform();
+        const UniformQuantizationInfo oq_unif = oq_info.uniform();
  
-        const auto output_quant_info = (output.total_size() == 0) ? iq_info : oq_info;
+        const auto output_quant_info = (output.total_size() == 0) ? iq_unif : oq_unif;
  
-        const float multiplier        = (iq_info.scale * wq_info.scale) / output_quant_info.scale;
+        const float multiplier        = (iq_unif.scale * wq_unif.scale) / output_quant_info.scale;
          int         output_multiplier = 0;
          int         output_shift      = 0;
          ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
@@ -68,6 +69,27 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn
          PixelValue type_max{};
          std::tie(type_min, type_max) = get_min_max(data_type);
  
+        if(activation_info.enabled())
+        {
+            switch(activation_info.activation())
+            {
+                case ActivationLayerInfo::ActivationFunction::RELU:
+                    type_min = PixelValue(oq_unif.offset);
+                    break;
+                case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+                    type_min = PixelValue(oq_unif.offset);
+                    type_max = PixelValue(activation_info.a(), data_type, oq_info);
+                    break;
+                case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
+                    type_min = PixelValue(activation_info.b(), data_type, oq_info);
+                    type_max = PixelValue(activation_info.a(), data_type, oq_info);
+                    break;
+                default:
+                    ARM_COMPUTE_ERROR("Activation function not supported.");
+                    break;
+            }
+        }
+
          // Set the GEMMLowp output stage info
          gemmlowp_output_stage.gemmlowp_offset     = output_quant_info.offset;
          gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;
@@ -84,7 +106,7 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn
  Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo *bias, const ITensorInfo &output, const FullyConnectedLayerInfo &fc_info)
  {
      GEMMLowpOutputStageInfo gemmlowp_output_stage;
-    ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage));
+    ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage, fc_info.activation_info));
  
      const GEMMInfo &gemm_info = GEMMInfo(false,                           // is_a_reshaped
                                           false,                           // is_b_reshaped
@@ -144,7 +166,7 @@ CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> mem
  void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info)
  {
      GEMMLowpOutputStageInfo gemmlowp_output_stage;
-    construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(), gemmlowp_output_stage);
+    construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(), gemmlowp_output_stage, fc_info.activation_info);
  
      const GEMMInfo &gemm_info = GEMMInfo(false,                           // is_a_reshaped
                                           false,                           // is_b_reshaped
@@ -155,7 +177,7 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor
                                           gemmlowp_output_stage,           // gemmlowp_output_stage
                                           fc_info.fp_mixed_precision,      // fp_mixed_precision
                                           true,                            // broadcast_bias
-                                         ActivationLayerInfo());          // activation_info
+                                         fc_info.activation_info);        // activation_info
  
      if(_is_quantized)
      {
@@ -313,6 +335,8 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
      ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
      ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
+    ARM_COMPUTE_RETURN_ERROR_ON(fc_info.activation_info.enabled() && is_data_type_quantized(input->data_type()) && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU
+                                && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);
  
      bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
      bool is_fc_after_conv = true;
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp

index 682812b1c88eb072b0928dddee40e9fb87d4ccc8..5398050533ad6d462b9f4b9a801da505fa61f491 100644 (file)
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -333,8 +333,12 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
          gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
          gemmlowp_output_stage.gemmlowp_shift      = gemmlowp_output_stage.gemmlowp_shifts[0];
  
-        int min_activation = 0;
-        int max_activation = 0;
+        PixelValue min_val{};
+        PixelValue max_val{};
+        std::tie(min_val, max_val) = get_min_max(output->info()->data_type());
+
+        auto min_activation = min_val.get<int32_t>();
+        auto max_activation = max_val.get<int32_t>();
  
          const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
                                                                                     ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
diff --git a/tests/validation/CL/FullyConnectedLayer.cpp b/tests/validation/CL/FullyConnectedLayer.cpp

index e57dd4e7b15d50e61462bb4aaea23f38ae44efcc..357d77d03ab57c89a4c29afa3763ce4f7a08eed0 100644 (file)
--- a/tests/validation/CL/FullyConnectedLayer.cpp
+++ b/tests/validation/CL/FullyConnectedLayer.cpp
@@ -67,6 +67,23 @@ const auto QuantizationData = framework::dataset::make("QuantizationInfo",
      QuantizationInfo(1.f / 255.f, 10),
      QuantizationInfo(1.1f, 10),
  });
+
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+{
+    ActivationLayerInfo(),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH)
+});
+
+const auto ActivationFunctionsQuantizedDataset = framework::dataset::make("ActivationInfo",
+{
+    ActivationLayerInfo(),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f)
+});
  } // namespace
  
  TEST_SUITE(CL)
@@ -174,16 +191,18 @@ using CLFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<CLTens
  
  TEST_SUITE(Float)
  TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
                                                                                                                          FullyConnectedParameters),
-                                                                                                                framework::dataset::make("DataType", DataType::F16)))
+                                                                                                                        framework::dataset::make("DataType", DataType::F16)),
+                                                                                                                ActivationFunctionsDataset))
  {
      // Validate output
      validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
  }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(),
                                                                                                                        FullyConnectedParameters),
-                                                                                                              framework::dataset::make("DataType", DataType::F16)))
+                                                                                                                      framework::dataset::make("DataType", DataType::F16)),
+                                                                                                              ActivationFunctionsDataset))
  {
      // Validate output
      validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
@@ -191,14 +210,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<half>, framework::
  TEST_SUITE_END()
  
  TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
-                                                                                                                 framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
+                                                                                                                 framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                 ActivationFunctionsDataset))
  {
      // Validate output
      validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
  }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
-                                                                                                               framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
+                                                                                                                       framework::dataset::make("DataType", DataType::F32)),
+                                                                                                               ActivationFunctionsDataset))
  {
      // Validate output
      validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
@@ -212,13 +233,15 @@ using CLFullyConnectedLayerQuantizedFixture = FullyConnectedLayerValidationQuant
  TEST_SUITE(Quantized)
  TEST_SUITE(QASYMM8)
  FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData))
+                       combine(combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData),
+                               ActivationFunctionsQuantizedDataset))
  {
      // Validate output
      validate(CLAccessor(_target), _reference, tolerance_qasymm8);
  }
  FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData))
+                       combine(combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData),
+                               ActivationFunctionsQuantizedDataset))
  {
      // Validate output
      validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -226,7 +249,8 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture<uint8_t>,
  TEST_SUITE_END() /* QASYMM8 */
  TEST_SUITE(QASYMM8_SIGNED)
  FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), QuantizationData))
+                       combine(combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), QuantizationData),
+                               ActivationFunctionsQuantizedDataset))
  {
      // Validate output
      validate(CLAccessor(_target), _reference, tolerance_qasymm8);
diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp

index eb42c4c659186cc90b43f3413c52531abe8ef5a0..94621b4393daf34ed3ec26da87a6274f143056d6 100644 (file)
--- a/tests/validation/CL/GEMMLowp.cpp
+++ b/tests/validation/CL/GEMMLowp.cpp
@@ -150,7 +150,7 @@ TEST_SUITE(QuantizeDownInt32ToUint8Scale)
  
  const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
                                                        3)
-                                                      * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+                                                      * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
  
  const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
                                                             2)
@@ -229,7 +229,7 @@ TEST_SUITE_END() // QuantizeDownInt32ToUint8Scale
  TEST_SUITE(QuantizeDownInt32ToUint8ScaleByFixedPoint)
  const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
                                                                      2)
-                                                                    * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+                                                                    * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
  
  const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
                                                                           2)
@@ -310,7 +310,7 @@ TEST_SUITE_END() // BoundedReLu
  TEST_SUITE_END() // QuantizeDownInt32ToUint8ScaleByFixedPoint
  TEST_SUITE(QuantizeDownInt32ToInt8ScaleByFixedPoint)
  const auto quantize_down_int32_to_int8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2)
-                                                                   * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+                                                                   * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128) * framework::dataset::make("max", 128) * framework::dataset::make("addBias", { false, true });
  
  const auto quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2)
                                                                          * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128, -126) * framework::dataset::make("max", 110, 112) * framework::dataset::make("addBias", { false, true });
@@ -379,7 +379,7 @@ TEST_SUITE(QuantizeDownInt32ToInt16ScaleByFixedPoint)
  
  const auto quantize_down_int32_to_int16_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
                                                                      2)
-                                                                    * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+                                                                    * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true });
  
  const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
                                                                           2)
@@ -389,7 +389,7 @@ const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = f
                                                                                                          1073741825)
                                                                                 * framework::dataset::make("result_shift", -3,
                                                                                                            -2)
-                                                                               * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+                                                                               * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true });
  
  const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600,
                                                                                                               254601602)
@@ -404,26 +404,21 @@ using CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture =
  // clang-format off
  DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
      framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32),
-                                             TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max
                                               TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Wrong output data type
                                            }),
      framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
                                              TensorInfo(TensorShape(21U), 1, DataType::S32),
-                                            TensorInfo(TensorShape(21U), 1, DataType::S32),
                                            })),
      framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QSYMM16),
-                                            TensorInfo(TensorShape(21U, 13U), 1, DataType::QSYMM16),
                                              TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
                                             })),
      framework::dataset::make("Min",{        -205,
-                                            -60000,
                                              -180,
                                             })),
      framework::dataset::make("Max",{        205,
-                                            60000,
                                              180,
                                             })),
-    framework::dataset::make("Expected", { true, false, false })),
+    framework::dataset::make("Expected", { true, false })),
      a_info, b_info, output_info, min, max, expected)
  {
      // Lock tensors
diff --git a/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp b/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp

index 4c7ef81572e0b77e2a79f22f214b6f02c16f435e..1ef2fb9559630dccbd888bc276ceff63068857b9 100644 (file)
--- a/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -56,7 +56,8 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
      DataType::F32,
  });
  
-const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true }));
+const auto FullyConnectedParameters   = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true }));
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", ActivationLayerInfo());
  } // namespace
  
  TEST_SUITE(GC)
@@ -107,16 +108,18 @@ using GCFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<GCTens
  
  TEST_SUITE(Float)
  TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture<half_float::half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture<half_float::half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
                         FullyConnectedParameters),
-                       framework::dataset::make("DataType", DataType::F16)))
+                       framework::dataset::make("DataType", DataType::F16)),
+                       ActivationFunctionsDataset))
  {
      // Validate output
      validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num);
  }
-FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture<half_float::half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture<half_float::half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(),
                         FullyConnectedParameters),
-                       framework::dataset::make("DataType", DataType::F16)))
+                       framework::dataset::make("DataType", DataType::F16)),
+                       ActivationFunctionsDataset))
  {
      // Validate output
      validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num);
@@ -124,14 +127,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture<half_float::half>,
  TEST_SUITE_END()
  
  TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
-                                                                                                                 framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
+                                                                                                                 framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                 ActivationFunctionsDataset))
  {
      // Validate output
      validate(GCAccessor(_target), _reference, rel_tolerance_f32);
  }
-FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
-                                                                                                               framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
+                                                                                                                       framework::dataset::make("DataType", DataType::F32)),
+                                                                                                               ActivationFunctionsDataset))
  {
      // Validate output
      validate(GCAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp

index fae116aa9ff8745cd422dbb11471d70fc6e00a2d..f66b0ceb8aef8ab4a57b19d3d04ab6f8edecbdb7 100644 (file)
--- a/tests/validation/NEON/FullyConnectedLayer.cpp
+++ b/tests/validation/NEON/FullyConnectedLayer.cpp
@@ -71,6 +71,8 @@ const auto QuantizationData = framework::dataset::make("QuantizationInfo",
      QuantizationInfo(1.f / 256.f, 10),
      QuantizationInfo(1.1f, 10),
  });
+
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", ActivationLayerInfo());
  } // namespace
  
  TEST_SUITE(NEON)
@@ -179,16 +181,18 @@ using NEFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<Tensor
  TEST_SUITE(Float)
  #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
                                                                                                                          FullyConnectedParameters),
-                                                                                                                framework::dataset::make("DataType", DataType::F16)))
+                                                                                                                        framework::dataset::make("DataType", DataType::F16)),
+                                                                                                                ActivationFunctionsDataset))
  {
      // Validate output
      validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
  }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(),
                                                                                                                        FullyConnectedParameters),
-                                                                                                              framework::dataset::make("DataType", DataType::F16)))
+                                                                                                                      framework::dataset::make("DataType", DataType::F16)),
+                                                                                                              ActivationFunctionsDataset))
  {
      // Validate output
      validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16);
@@ -197,14 +201,16 @@ TEST_SUITE_END()
  #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
  
  TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
-                                                                                                                 framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
+                                                                                                                 framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                 ActivationFunctionsDataset))
  {
      // Validate output
      validate(Accessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
  }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
-                                                                                                               framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
+                                                                                                                       framework::dataset::make("DataType", DataType::F32)),
+                                                                                                               ActivationFunctionsDataset))
  {
      // Validate output
      validate(Accessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
@@ -217,31 +223,34 @@ using NEFullyConnectedLayerQuantizedFixture = FullyConnectedLayerValidationQuant
  
  TEST_SUITE(Quantized)
  TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(
                             combine(datasets::SmallFullyConnectedLayerDataset(),
                                     FullyConnectedParameters),
                             framework::dataset::make("DataType", DataType::QASYMM8)),
-                       QuantizationData))
+                       QuantizationData),
+                       ActivationFunctionsDataset))
  {
      // Validate output
      validate(Accessor(_target), _reference, tolerance_qasymm8);
  }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(
+FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(
                             combine(datasets::LargeFullyConnectedLayerDataset(),
                                     FullyConnectedParameters),
                             framework::dataset::make("DataType", DataType::QASYMM8)),
-                       QuantizationData))
+                       QuantizationData),
+                       ActivationFunctionsDataset))
  {
      // Validate output
      validate(Accessor(_target), _reference, tolerance_qasymm8);
  }
  TEST_SUITE_END()
  TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(
                             combine(datasets::SmallFullyConnectedLayerDataset(),
                                     FullyConnectedParameters),
                             framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
-                       QuantizationData))
+                       QuantizationData),
+                       ActivationFunctionsDataset))
  {
      // Validate output
      validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp

index 10f2284914c4a96d444e1c749bdbcb8365a41e42..de30bd5451db814fc4cd570dbcc5e5259c149555 100644 (file)
--- a/tests/validation/NEON/GEMMLowp.cpp
+++ b/tests/validation/NEON/GEMMLowp.cpp
@@ -169,7 +169,7 @@ TEST_SUITE(QuantizeDownInt32ToUint8Scale)
  
  const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
                                                        3)
-                                                      * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+                                                      * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
  
  const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
                                                             2)
@@ -181,26 +181,21 @@ using NEGEMMLowpQuantizeDownInt32ToUint8ScaleFixture = GEMMLowpQuantizeDownInt32
  // clang-format off
  DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
      framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16
-                                             TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max
                                               TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
                                            }),
      framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
-                                            TensorInfo(TensorShape(21U), 1, DataType::S32),
                                              TensorInfo(TensorShape(20U), 1, DataType::S32),
                                            })),
      framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8),
-                                            TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8),
                                              TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
                                             })),
      framework::dataset::make("Min",{        0,
-                                            8,
                                              13,
                                             })),
      framework::dataset::make("Max",{        205,
-                                            300,
                                              180,
                                             })),
-    framework::dataset::make("Expected", { true, false, false })),
+    framework::dataset::make("Expected", { true, false })),
      a_info, b_info, output_info, min, max, expected)
  {
      // Lock tensors
@@ -287,7 +282,7 @@ TEST_SUITE(QuantizeDownInt32ToUint8ScaleByFixedPoint)
  
  const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
                                                                      2)
-                                                                    * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+                                                                    * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
  
  const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
                                                                           2)
@@ -303,26 +298,21 @@ using NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture =
  // clang-format off
  DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
      framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16
-                                             TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max
                                               TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
                                            }),
      framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
-                                            TensorInfo(TensorShape(21U), 1, DataType::S32),
                                              TensorInfo(TensorShape(20U), 1, DataType::S32),
                                            })),
      framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8),
-                                            TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8),
                                              TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
                                             })),
      framework::dataset::make("Min",{        0,
-                                            8,
                                              13,
                                             })),
      framework::dataset::make("Max",{        205,
-                                            300,
                                              180,
                                             })),
-    framework::dataset::make("Expected", { true, false, false })),
+    framework::dataset::make("Expected", { true, false })),
      a_info, b_info, output_info, min, max, expected)
  {
      // Lock tensors
@@ -414,7 +404,7 @@ TEST_SUITE(QuantizeDownInt32ToInt8ScaleByFixedPoint)
  
  const auto quantize_down_int32_to_int8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
                                                                     2)
-                                                                   * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+                                                                   * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128) * framework::dataset::make("max", 128) * framework::dataset::make("addBias", { false, true });
  
  const auto quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
                                                                          2)
@@ -427,31 +417,26 @@ using NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture =
  // clang-format off
  DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
          framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::F32), // Invalid input data type
-                                                 TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max
                                                   TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
                                                   TensorInfo(TensorShape(21U, 13U), 1, DataType::S32),
          }),
          framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
-                                                TensorInfo(TensorShape(21U), 1, DataType::S32),
                                                  TensorInfo(TensorShape(20U), 1, DataType::S32),
                                                  TensorInfo(TensorShape(21U), 1, DataType::S32),
          })),
          framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED),
-                                                TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED),
                                                  TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
                                                  TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED),
          })),
          framework::dataset::make("Min",{ -110,
-                                         -130,
                                           -113,
                                           -113,
          })),
          framework::dataset::make("Max",{ 87,
-                                         140,
                                           97,
                                           97,
          })),
-        framework::dataset::make("Expected", { false, false, false, true })),
+        framework::dataset::make("Expected", { false, false, true })),
                 a_info, b_info, output_info, min, max, expected)
  {
      // Lock tensors
@@ -527,7 +512,7 @@ TEST_SUITE(QuantizeDownInt32ToInt16ScaleByFixedPoint)
  
  const auto quantize_down_int32_to_int16_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
                                                                      2)
-                                                                    * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+                                                                    * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true });
  
  const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
                                                                           2)
@@ -536,7 +521,7 @@ const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = f
                                                                                                          1073741825)
                                                                                 * framework::dataset::make("result_shift", -3,
                                                                                                            -2)
-                                                                               * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+                                                                               * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true });
  
  const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600,
                                                                                                               254601602)
@@ -551,26 +536,21 @@ using NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture =
  // clang-format off
  DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
      framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16
-                                             TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max
                                               TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
                                            }),
      framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
-                                            TensorInfo(TensorShape(21U), 1, DataType::S32),
                                              TensorInfo(TensorShape(20U), 1, DataType::S32),
                                            })),
      framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QSYMM16),
-                                            TensorInfo(TensorShape(21U, 13U), 1, DataType::QSYMM16),
                                              TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
                                             })),
      framework::dataset::make("Min",{        -205,
-                                            -60000,
                                              -180,
                                             })),
      framework::dataset::make("Max",{        205,
-                                            60000,
                                              180,
                                             })),
-    framework::dataset::make("Expected", { true, false, false })),
+    framework::dataset::make("Expected", { true, false })),
      a_info, b_info, output_info, min, max, expected)
  {
      // Lock tensors
diff --git a/tests/validation/fixtures/FullyConnectedLayerFixture.h b/tests/validation/fixtures/FullyConnectedLayerFixture.h

index 7f0ceadea104aba2f767e0754edc34ea3c51d1f0..6952b226dab93ef0cdcbf028864ba17e1468eec3 100644 (file)
--- a/tests/validation/fixtures/FullyConnectedLayerFixture.h
+++ b/tests/validation/fixtures/FullyConnectedLayerFixture.h
@@ -34,6 +34,7 @@
  #include "tests/framework/Asserts.h"
  #include "tests/framework/Fixture.h"
  #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ActivationLayer.h"
  #include "tests/validation/reference/FullyConnectedLayer.h"
  #include "tests/validation/reference/Utils.h"
  
@@ -55,7 +56,7 @@ public:
  public:
      template <typename...>
      void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights,
-               DataType data_type, QuantizationInfo quantization_info)
+               DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo activation_info)
      {
          ARM_COMPUTE_UNUSED(weights_shape);
          ARM_COMPUTE_UNUSED(bias_shape);
@@ -63,6 +64,7 @@ public:
          _data_type         = data_type;
          _bias_data_type    = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
          _quantization_info = quantization_info;
+        _activation_info   = activation_info;
  
          _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights);
          _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape);
@@ -130,6 +132,7 @@ protected:
          FullyConnectedLayerInfo fc_info;
          fc_info.transpose_weights    = transpose_weights;
          fc_info.are_weights_reshaped = !reshape_weights;
+        fc_info.activation_info      = _activation_info;
  
          // Create and configure function.
          FunctionType fc;
@@ -199,14 +202,15 @@ protected:
          fill(weights, 1);
          fill(bias, 2);
  
-        return reference::fully_connected_layer<T>(src, weights, bias, output_shape);
+        return reference::activation_layer(reference::fully_connected_layer<T>(src, weights, bias, output_shape), _activation_info, _quantization_info);
      }
  
-    TensorType       _target{};
-    SimpleTensor<T>  _reference{};
-    DataType         _data_type{};
-    DataType         _bias_data_type{};
-    QuantizationInfo _quantization_info{};
+    TensorType          _target{};
+    SimpleTensor<T>     _reference{};
+    DataType            _data_type{};
+    DataType            _bias_data_type{};
+    QuantizationInfo    _quantization_info{};
+    ActivationLayerInfo _activation_info{};
  };
  
  template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
@@ -214,11 +218,12 @@ class FullyConnectedLayerValidationFixture : public FullyConnectedLayerValidatio
  {
  public:
      template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type)
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type,
+               ActivationLayerInfo activation_info)
      {
          FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, transpose_weights,
                                                                                                        reshape_weights, data_type,
-                                                                                                      QuantizationInfo());
+                                                                                                      QuantizationInfo(), activation_info);
      }
  };
  
@@ -228,11 +233,11 @@ class FullyConnectedLayerValidationQuantizedFixture : public FullyConnectedLayer
  public:
      template <typename...>
      void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type,
-               QuantizationInfo quantization_info)
+               QuantizationInfo quantization_info, ActivationLayerInfo activation_info)
      {
          FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, transpose_weights,
                                                                                                        reshape_weights, data_type,
-                                                                                                      quantization_info);
+                                                                                                      quantization_info, activation_info);
      }
  };
  } // namespace validation
author	Giorgio Arena <giorgio.arena@arm.com>
	Fri, 7 Feb 2020 13:46:45 +0000 (13:46 +0000)
committer	Giorgio Arena <giorgio.arena@arm.com>
	Mon, 2 Mar 2020 15:51:39 +0000 (15:51 +0000)
arm_compute/core/PixelValue.h		patch \| blob \| history
arm_compute/core/Types.h		patch \| blob \| history
arm_compute/graph/nodes/FullyConnectedLayerNode.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h		patch \| blob \| history
arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h		patch \| blob \| history
src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp		patch \| blob \| history
src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp		patch \| blob \| history
src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp		patch \| blob \| history
src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp		patch \| blob \| history
src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp		patch \| blob \| history
src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp		patch \| blob \| history
src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp		patch \| blob \| history
src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp		patch \| blob \| history
src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp		patch \| blob \| history
src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp		patch \| blob \| history
src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp		patch \| blob \| history
src/graph/mutators/NodeFusionMutator.cpp		patch \| blob \| history
src/graph/nodes/FullyConnectedLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLFullyConnectedLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp		patch \| blob \| history
tests/validation/CL/FullyConnectedLayer.cpp		patch \| blob \| history
tests/validation/CL/GEMMLowp.cpp		patch \| blob \| history
tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp		patch \| blob \| history
tests/validation/NEON/FullyConnectedLayer.cpp		patch \| blob \| history
tests/validation/NEON/GEMMLowp.cpp		patch \| blob \| history
tests/validation/fixtures/FullyConnectedLayerFixture.h		patch \| blob \| history