COMPMID-3280: Make all ML primitives for CL use the new interface - Part 2

author Manuel Bottini <manuel.bottini@arm.com>

Wed, 8 Apr 2020 09:15:51 +0000 (10:15 +0100)

committer Manuel Bottini <manuel.bottini@arm.com>

Thu, 23 Apr 2020 17:53:59 +0000 (17:53 +0000)
author Manuel Bottini <manuel.bottini@arm.com>
Wed, 8 Apr 2020 09:15:51 +0000 (10:15 +0100)
committer Manuel Bottini <manuel.bottini@arm.com>
Thu, 23 Apr 2020 17:53:59 +0000 (17:53 +0000)
diff --git a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h

index 28d3acc043624acea01b8d59488124f02717d668..26aded6def95a60b6c6ca88812976f1b34c2a11b 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h
+++ b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -45,6 +45,14 @@ public:
       * @param[out] output Output tensor. Data types supported: U8, S16
       */
      void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          First input tensor. Data types supported: U8, S16
+     * @param[in]  input2          Second input tensor. Data types supported: U8, S16
+     * @param[out] output          Output tensor. Data types supported: U8, S16
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
  };
  }
  #endif /* ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H */
diff --git a/arm_compute/runtime/CL/functions/CLAccumulate.h b/arm_compute/runtime/CL/functions/CLAccumulate.h

index f465ab3c46709b94db720f3de4eea7bccdfbaac1..b47f0c0e4a72f081a3a40e9039cfacf9eb2bd891 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLAccumulate.h
+++ b/arm_compute/runtime/CL/functions/CLAccumulate.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -42,6 +42,13 @@ public:
       * @param[out] accum Destination tensor. Data types supported: S16.
       */
      void configure(const ICLTensor *input, ICLTensor *accum);
+    /** Set the input and accumulation tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U8.
+     * @param[out] accum           Destination tensor. Data types supported: S16.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum);
  };
  
  /** Basic function to run @ref CLAccumulateWeightedKernel */
@@ -55,6 +62,14 @@ public:
       * @param[in,out] accum Accumulated tensor. Data types supported: U8.
       */
      void configure(const ICLTensor *input, float alpha, ICLTensor *accum);
+    /** Set the input and accumulation tensors, and the scale value.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in]     input           Source tensor. Data types supported: U8.
+     * @param[in]     alpha           The input scalar value with a value input the range of [0, 1.0]. Data types supported: F32.
+     * @param[in,out] accum           Accumulated tensor. Data types supported: U8.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum);
  };
  
  /** Basic function to run @ref CLAccumulateSquaredKernel */
@@ -68,6 +83,14 @@ public:
       * @param[in,out] accum Accumulated tensor. Data types supported: S16.
       */
      void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum);
+    /** Set the input and accumulation tensors and the shift value.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in]     input           Source tensor. Data types supported: U8.
+     * @param[in]     shift           The input with a value input the range of [0, 15]. Data types supported: U32.
+     * @param[in,out] accum           Accumulated tensor. Data types supported: S16.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum);
  };
  }
  #endif /*ARM_COMPUTE_CLACCUMULATE_H */
diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h

index 09f5d2bf58dbc41df3efec62b3d2b27b52d38b57..fbb34e5fb9195c136dfb796b36f181cb29f55219 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLActivationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h
@@ -62,6 +62,17 @@ public:
       * @param[in]      act_info Activation layer parameters.
       */
      void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
+    /** Set the input and output tensor.
+     *
+     * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+     *                                 of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
+     * @param[out]     output          Destination tensor. Data type supported: same as @p input
+     * @param[in]      act_info        Activation layer parameters.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
      /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayer
       *
       * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h

index a26fcfda5666a9c24ab8a7e5c4936e2f6fa3335b..b0d29bcefeb514badd8e67a4a72c0afef0e1bdf2 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
@@ -61,6 +61,15 @@ public:
       * @param[in]  op     Reduction operation to perform. Operations supported: ARG_IDX_MAX, ARG_IDX_MIN
       */
      void configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input source tensor. Data types supported: QASYMM8/F16/F32.
+     * @param[in]  axis            Axis to find max/min index.
+     * @param[out] output          Output source tensor. Data types supported: U32/S32.
+     * @param[in]  op              Reduction operation to perform. Operations supported: ARG_IDX_MAX, ARG_IDX_MIN
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op);
      /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayer
       *
       * @param[in] input  Input source tensor info. Data types supported: QASYMM8/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h

index 7cd4d164d8f6516ce7c2740af39817b6eb4aa4ed..a211ea6b04331ca5144d41c97372e8023ec69ed7 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -61,6 +61,25 @@ public:
       */
      void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f,
                     ActivationLayerInfo act_info = ActivationLayerInfo());
+    /** Set the input and output tensors.
+     *
+     * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
+     *                                 3 lower dimensions represent a single input with dimensions [width, height, FM].
+     *                                 The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+     * @param[out]     output          Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+     * @param[in]      mean            Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      var             Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      beta            (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
+     * @param[in]      gamma           (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
+     * @param[in]      epsilon         (Optional) Small value to avoid division with zero. Default value is 0.001f.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr,
+                   const ICLTensor *gamma = nullptr,
+                   float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayer
       *
       * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
diff --git a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h

index b98702819b234388c6da4be9fa37e9332f781441..6edb4641fec76dacd6ad5d3eb929073a38c17eff 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
@@ -46,6 +46,14 @@ public:
       * @param[out] output      Tensor output. Data types supported: same as @p input
       */
      void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[in]  block_shape     1-D tensor with shape [M]. Data types supported: S32
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
      /** Set the input and output tensors. (Static block shape).
       *
       * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -54,6 +62,15 @@ public:
       * @param[out] output        Tensor output. Data types supported: same as @p input
       */
      void configure(const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output);
+    /** Set the input and output tensors. (Static block shape).
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[in]  block_shape_x   Block shape x value.
+     * @param[in]  block_shape_y   Block shape y value.
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer
       *
       * @param[in]  input       Tensor input info. Supported tensor rank: 4. Data types supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h

index 77907cc08b5dc032770f0d77a89777a32d8e3139..1faded04fecaddd89b180deb6fc774550235fbfa 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -45,6 +45,14 @@ public:
       * @param[out] output Output tensor. Data types supported: U8.
       */
      void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Input tensor. Data types supported: U8.
+     * @param[in]  input2          Input tensor. Data types supported: U8.
+     * @param[out] output          Output tensor. Data types supported: U8.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
  };
  }
  #endif /* ARM_COMPUTE_CLBITWISEAND_H */
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseNot.h b/arm_compute/runtime/CL/functions/CLBitwiseNot.h

index b5c7cfe5fc0f04a7e859c54d11948ce1aedbcbcf..c9460555dde129477a5cde065912913be5401d0b 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLBitwiseNot.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseNot.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -44,6 +44,13 @@ public:
       * @param[out] output Output tensor. Data types supported: U8.
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: U8.
+     * @param[out] output          Output tensor. Data types supported: U8.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
  };
  }
  #endif /* ARM_COMPUTE_CLBITWISENOT_H */
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseOr.h b/arm_compute/runtime/CL/functions/CLBitwiseOr.h

index 5957c3f6a820d9eaf4a35a201f966dfaf7e835b2..4fb93cc8a26d6bb1e0ded7899fc03be917a360f6 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLBitwiseOr.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseOr.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -45,6 +45,14 @@ public:
       * @param[out] output Output tensor. Data types supported: U8.
       */
      void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Input tensor. Data types supported: U8.
+     * @param[in]  input2          Input tensor. Data types supported: U8.
+     * @param[out] output          Output tensor. Data types supported: U8.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
  };
  }
  #endif /* ARM_COMPUTE_CLBITWISEOR_H */
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseXor.h b/arm_compute/runtime/CL/functions/CLBitwiseXor.h

index a4e864c0aa1f24d9fd01b421e21cffc590353006..6caa0136072eafdcccc0f0c5506da3a7a7b87fa8 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLBitwiseXor.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseXor.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -45,6 +45,14 @@ public:
       * @param[out] output Output tensor. Data types supported: U8.
       */
      void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Input tensor. Data types supported: U8.
+     * @param[in]  input2          Input tensor. Data types supported: U8.
+     * @param[out] output          Output tensor. Data types supported: U8.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
  };
  }
  #endif /* ARM_COMPUTE_CLBITWISEXOR_H */
diff --git a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h

index 3e117818272502694f5c00d3b9b7124748d543d7..b09359dfc2dc3443b4c9616d6d4f9e8c843144d9 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h
+++ b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -50,6 +50,18 @@ public:
       * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
       */
      void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  boxes           Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
+     * @param[out] pred_boxes      Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
+     * @param[in]  deltas          Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K  is the number of classes.
+     *                             Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input
+     * @param[in]  info            Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
+     *
+     * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform
       *
diff --git a/arm_compute/runtime/CL/functions/CLBox3x3.h b/arm_compute/runtime/CL/functions/CLBox3x3.h

index 3fb18e3270557e6b86f4edde2cce709c0460c21a..a4cf4d296bdf4bcf5371131627044918e1f75634 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLBox3x3.h
+++ b/arm_compute/runtime/CL/functions/CLBox3x3.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -50,6 +50,15 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
  };
  }
  #endif /*ARM_COMPUTE_CLBOX3X3_H */
diff --git a/arm_compute/runtime/CL/functions/CLCannyEdge.h b/arm_compute/runtime/CL/functions/CLCannyEdge.h

index 1a5676795f409d6cebcecb0dbf9268984080e715..2729d241a91f88c9c1fbdb365eb7f96031a46096 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLCannyEdge.h
+++ b/arm_compute/runtime/CL/functions/CLCannyEdge.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -69,6 +69,20 @@ public:
       */
      void configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode,
                     uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor. Data types supported: U8.
+     * @param[in]     upper_thr             Upper threshold used for the hysteresis.
+     * @param[in]     lower_thr             Lower threshold used for the hysteresis.
+     * @param[in]     gradient_size         Gradient size (3, 5 or 7).
+     * @param[in]     norm_type             Normalization type. if 1, L1-Norm otherwise L2-Norm.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode,
+                   uint8_t constant_border_value = 0);
  
      // Inherited methods overridden:
      virtual void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLCast.h b/arm_compute/runtime/CL/functions/CLCast.h

index 4cb1fe0bb50ca47cea4f52eb4a763f8d68765020..6a1835c73a538efadb5109f2590e544201b1fd05 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLCast.h
+++ b/arm_compute/runtime/CL/functions/CLCast.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -56,6 +56,26 @@ public:
       * @param[in]  policy Conversion policy.
       */
      void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy);
+    /** Initialize the function's source, destination
+     *
+     * Input data type must be different than output data type.
+     *
+     * Valid conversions Input -> Output :
+     *
+     *   - U8  -> S8, U16, S16, U32, S32, F16, F32
+     *   - U16 -> U8, S8, S16, U32, S32, F16, F32
+     *   - S16 -> U8, S8, U16, U32, S32, F16, F32
+     *   - U32 -> U8, S8, U16, S16, S32, F16, F32
+     *   - S32 -> U8, S8, U16, S16, U32, F16, F32
+     *   - F16 -> U8, S8, U16, S16, U32, F32
+     *   - F32 -> U8, S8, U16, S16, U32, F16
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[out] output          The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  policy          Conversion policy.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy);
      /** Static function to check if given info will lead to a valid configuration of @ref CLCast
       *
       * @param[in] input  Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLChannelCombine.h b/arm_compute/runtime/CL/functions/CLChannelCombine.h

index 25f31d86d18626a1f8847b5da824a94e2bfa2c3f..474830d7af9e308f54889874dd88292031c32485 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLChannelCombine.h
+++ b/arm_compute/runtime/CL/functions/CLChannelCombine.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -45,6 +45,16 @@ public:
       * @param[out] output The single planar output tensor.
       */
      void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
+    /** Initialize function's inputs and outputs.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  plane0          The 2D plane that forms channel 0. Must be of U8 format.
+     * @param[in]  plane1          The 2D plane that forms channel 1. Must be of U8 format.
+     * @param[in]  plane2          The 2D plane that forms channel 2. Must be of U8 format.
+     * @param[in]  plane3          The 2D plane that forms channel 3. Must be of U8 format.
+     * @param[out] output          The single planar output tensor.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
      /** Initialize function's inputs and outputs.
       *
       * @param[in]  plane0 The 2D plane that forms channel 0. Must be of U8 format.
@@ -53,6 +63,15 @@ public:
       * @param[out] output The multi planar output image.
       */
      void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
+    /** Initialize function's inputs and outputs.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  plane0          The 2D plane that forms channel 0. Must be of U8 format.
+     * @param[in]  plane1          The 2D plane that forms channel 1. Must be of U8 format.
+     * @param[in]  plane2          The 2D plane that forms channel 2. Must be of U8 format.
+     * @param[out] output          The multi planar output image.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
  };
  }
  #endif /*ARM_COMPUTE_CLCHANNELCOMBINE_H*/
diff --git a/arm_compute/runtime/CL/functions/CLChannelExtract.h b/arm_compute/runtime/CL/functions/CLChannelExtract.h

index 77d84b968cc014c8ca89fd3d05c0050e9f782890..aa25516e186bad071182d99c00a396f7a41c2eae 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLChannelExtract.h
+++ b/arm_compute/runtime/CL/functions/CLChannelExtract.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -44,6 +44,14 @@ public:
       * @param[out] output  The extracted channel. Must be of U8 format.
       */
      void configure(const ICLTensor *input, Channel channel, ICLTensor *output);
+    /** Initialize the function's source, destination
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The input tensor to extract the channel from. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
+     * @param[in]  channel         The channel to extract.
+     * @param[out] output          The extracted channel. Must be of U8 format.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output);
      /** Initialize the function's source, destination
       *
       * @param[in]  input   The multi-planar input image to extract channel from. Formats supported: NV12/NV21/IYUV/YUV444
@@ -51,6 +59,14 @@ public:
       * @param[out] output  The extracted 2D channel. Must be of U8 format.
       */
      void configure(const ICLMultiImage *input, Channel channel, ICLImage *output);
+    /** Initialize the function's source, destination
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The multi-planar input image to extract channel from. Formats supported: NV12/NV21/IYUV/YUV444
+     * @param[in]  channel         The channel to extract.
+     * @param[out] output          The extracted 2D channel. Must be of U8 format.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output);
  };
  }
  #endif /*ARM_COMPUTE_CLCHANNELEXTRACT_H*/
diff --git a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h

index 6e30bd3ebd1d8f42fe6750578afaded2e8e33869..183a2f1ea6c7537c34b874a26446659a51ba7b5c 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
+++ b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
@@ -46,6 +46,14 @@ public:
       * @param[in]  num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
       */
      void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: All.
+     * @param[out] output          Output tensor. Data type supported: Same as @p input
+     * @param[in]  num_groups      Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
      /** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel
       *
       * @param[in] input      Input tensor info. Data types supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLColorConvert.h b/arm_compute/runtime/CL/functions/CLColorConvert.h

index 1a3bea9cd32387544507d24ff4aef4e81bda2e88..8721e8afa1f9312b0c2781e6368dc9adb0a846a0 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLColorConvert.h
+++ b/arm_compute/runtime/CL/functions/CLColorConvert.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -47,24 +47,54 @@ public:
       *                                                          U8 (if the formats of @p input is RGB888)
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function's source, destination
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
+     * @param[out] output          Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
+     *                                                          RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
+     *                                                          U8 (if the formats of @p input is RGB888)
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
      /** Initialize the function's source, destination
       *
       * @param[in]  input  Multi-planar source image. Formats supported: NV12/NV21/IYUV
       * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
       */
      void configure(const ICLMultiImage *input, ICLImage *output);
+    /** Initialize the function's source, destination
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Multi-planar source image. Formats supported: NV12/NV21/IYUV
+     * @param[out] output          Single-planar destination image. Formats supported: RGB888/RGBA8888
+     */
+    void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output);
      /** Initialize the function's source, destination
       *
       * @param[in]  input  Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
       * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
       */
      void configure(const ICLImage *input, ICLMultiImage *output);
+    /** Initialize the function's source, destination
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
+     * @param[out] output          Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
+     */
+    void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output);
      /** Initialize the function's source, destination
       *
       * @param[in]  input  Multi-planar source image. Formats supported: NV12/NV21/IYUV
       * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of  @p input is IYUV)
       */
      void configure(const ICLMultiImage *input, ICLMultiImage *output);
+    /** Initialize the function's source, destination
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Multi-planar source image. Formats supported: NV12/NV21/IYUV
+     * @param[out] output          Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of  @p input is IYUV)
+     */
+    void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output);
  };
  }
  #endif /* ARM_COMPUTE_CLCOLORCONVERT_H */
diff --git a/arm_compute/runtime/CL/functions/CLComparison.h b/arm_compute/runtime/CL/functions/CLComparison.h

index 85dbe7129d0eb7d4f507c642c0185b2802847116..4e681e73a74986f8e58a6c8dbc3c7978529ccf8c 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLComparison.h
+++ b/arm_compute/runtime/CL/functions/CLComparison.h
@@ -46,6 +46,17 @@ public:
       * @param[out] operation Comparison operation to be used.
       */
      void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
+    /** Initialise the kernel's inputs and outputs.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Source tensor. Data types supported: All.
+     *                             The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in]  input2          Source tensor. Data types supported: Same as @p input1.
+     *                             The input2 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out] output          Destination tensor. Data types supported: U8.
+     * @param[out] operation       Comparison operation to be used.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
      /** Static function to check if given info will lead to a valid configuration of @ref CLComparison
       *
       * @param[in]  input1    Source tensor. Data types supported: All.
@@ -75,6 +86,19 @@ public:
       * @param[out] output Destination tensor. Data types supported: U8.
       */
      void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
+    /** Comparison operations used by the class */
+
+public:
+    /** Initialise the kernel's inputs and outputs.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     *                             The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in]  input2          Source tensor. Data types supported: Same as @p input1.
+     *                             The input2 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out] output          Destination tensor. Data types supported: U8.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLComparison
       *
       * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h b/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h

index a039320c4e1fb183222b7be543518d1ca76ed428..15c5bfeb7d74946f63973b8567802aa1c9f4e7b6 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h
+++ b/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -47,6 +47,15 @@ public:
       *
       */
      void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  anchors         Source tensor. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32
+     * @param[out] all_anchors     Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+     * @param[in]  info            Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+     *
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel
       *
diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h

index c3d065a2bab9b8fc766096f59b85c6e5594debb8..b8e3361e9eb8040f4235c2031cabfa6427f1866a 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
@@ -62,6 +62,18 @@ public:
       */
      void configure(std::vector<ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
      void configure(std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
+    /** Initialise the kernel's inputs vector and output.
+     *
+     * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
+     * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] inputs_vector   The vectors containing all the tensors to concatenate. Data types supported: All.
+     * @param[out]    output          Output tensor. Data types supported: Same as @p input.
+     * @param[in]     axis            Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
+     */
+    void configure(const CLCompileContext &compile_context, std::vector<ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
+    void configure(const CLCompileContext &compile_context, std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
      /** Static function to check if given info will lead to a valid configuration of @ref CLConcatenateLayer
       *
       * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
@@ -81,7 +93,7 @@ public:
  
  private:
      template <typename TensorType>
-    void configure_internal(std::vector<TensorType *> &&inputs_vector, ICLTensor *output, size_t axis);
+    void configure_internal(const CLCompileContext &compile_context, std::vector<TensorType *> &&inputs_vector, ICLTensor *output, size_t axis);
  
      template <typename TensorInfoType>
      static Status validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis);
diff --git a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h

index 76a28ed6fee6e264201af459e639e8e98c951c07..123f6380bbf6e803286e12d3b8564952863a89af 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
@@ -47,6 +47,17 @@ public:
       * @return A status
       */
      void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
+    /** Initialize the function.
+     *
+     * @param[in]  compile_context      The compile context to be used.
+     * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
+     * @param[out] output               The converted weights tensor. Shape and Data Type: Same as @p input.
+     * @param[in]  original_input_shape Shape of the original input tensor (the one entering fully connected layer).
+     * @param[in]  data_layout          The data layout the weights have been trained in.
+     *
+     * @return A status
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
      /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeights
       *
       * @param[in] input                Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
@@ -96,7 +107,18 @@ public:
       */
      void configure(const ICLTensor *input, const TensorShape &original_input_shape, DataLayout data_layout)
      {
-        _func.configure(input, &_output, original_input_shape, data_layout);
+        configure(CLKernelLibrary::get().get_compile_context(), input, original_input_shape, data_layout);
+    }
+    /** Configures the @ref CLConvertFullyConnectedWeights function
+     *
+     * @param[in] compile_context      The compile context to be used.
+     * @param[in] input                Source weights tensor info to convert.  Data type supported: All.
+     * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
+     * @param[in] data_layout          The data layout the weights have been trained in.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const TensorShape &original_input_shape, DataLayout data_layout)
+    {
+        _func.configure(compile_context, input, &_output, original_input_shape, data_layout);
      }
  
  private:
diff --git a/arm_compute/runtime/CL/functions/CLConvolution.h b/arm_compute/runtime/CL/functions/CLConvolution.h

index 43507d7cbc7463f48aaf85e3bd148ea921628f1e..72ef8ce7b8f003cf3b5653f66982532033f77dc5 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLConvolution.h
+++ b/arm_compute/runtime/CL/functions/CLConvolution.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -59,6 +59,17 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8 or S16.
+     * @param[in]     conv                  matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
+     * @param[in]     scale                 Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
  };
  
  /** Basic function to execute square convolution.Currently it supports 5x5, 7x7, 9x9. This function calls the following OpenCL kernels:
@@ -84,6 +95,17 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8 or S16.
+     * @param[in]     conv                  matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
+     * @param[in]     scale                 Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
  
      // Inherited methods overriden:
      void run() override;
@@ -127,6 +149,20 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8 or S16.
+     * @param[in]     conv                  Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
+     * @param[in]     rows                  Rows of convolution kernel.
+     * @param[in]     cols                  Columns of convolution kernel.
+     * @param[in]     scale                 Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode,
+                   uint8_t constant_border_value = 0);
  };
  }
  #endif /*ARM_COMPUTE_CLCONVOLUTION_H */
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h

index b52695463acd33ed462789adc3151bf1c5994131..fff917321077d4b0775da9283918252a1dabd75a 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -94,6 +94,29 @@ public:
       */
      void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
                     const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                              while every optional dimension from 4 and above represent a batch of inputs.
+     *                              Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+     *                              Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in]  biases           Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+     *                              Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+     * @param[out] output           Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                              Data types supported: Same as @p input.
+     * @param[in]  conv_info        Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  weights_info     Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input.
+     * @param[in]  dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  act_info         (Optional) Activation layer information in case of a fused activation.
+     * @param[in]  enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+     *                              available which may introduce a drop of accuracy as well. Default is false
+     * @param[in]  num_groups       (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false,
+                   unsigned int num_groups = 1);
      /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayer
       *
       * @param[in] input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/CL/functions/CLCopy.h b/arm_compute/runtime/CL/functions/CLCopy.h

index 9252ac3c5711c2e76200398718735fa2bac0351f..31b73c33c3d987aa6df26b1316be2cbd0b0571ec 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLCopy.h
+++ b/arm_compute/runtime/CL/functions/CLCopy.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -43,6 +43,14 @@ public:
       *
       */
      void configure(ICLTensor *input, ICLTensor *output);
+    /** Initialise the function's source and destination.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[out] output          Output tensor. Data types supported: Same as @p input.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLCopy
       *
       * @param[in] input  Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
diff --git a/arm_compute/runtime/CL/functions/CLCropResize.h b/arm_compute/runtime/CL/functions/CLCropResize.h

index 244e345b0347dfffc96b54a6fd9c024cd2f5f26f..86df0d46d1de25a22cc415f14577060d083637ad 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLCropResize.h
+++ b/arm_compute/runtime/CL/functions/CLCropResize.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -62,10 +62,10 @@ public:
       * @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used.
       * @note Start and end indices of boxes are inclusive.
       *
-     * @param[in]  input               Source tensor containing N batches of 3D images to be cropped. Data type supported: F32
-     * @param[in]  boxes               Tensor containing the boxes used to crop the images. Data type supported: F32
+     * @param[in]  input               Source tensor containing N batches of 3D images to be cropped. Data type supported: : U16/S16/U32/S32/F16/F32
+     * @param[in]  boxes               Tensor containing the boxes used to crop the images. It has to be known before configuration. Data type supported: F32
       * @param[in]  box_ind             One dimensional tensor containing the batch index of the 3D image in @p input that the corresponding
-     *                                 box in @p boxes will be applied to. Data type supported: F32
+     *                                 box in @p boxes will be applied to. It has to be known before configuration. Data type supported: F32
       * @param[out] output              Destination tensor containing a cropped and resized image for each box in @p boxes. Data type supported: F32
       * @param[in]  crop_size           The dimensions that each cropped image will be resized to.
       * @param[in]  method              The policy to be used when resizing image. Default is bilinear.
@@ -73,6 +73,24 @@ public:
       */
      void configure(const ICLTensor *input, ICLTensor *boxes, ICLTensor *box_ind, ICLTensor *output, Coordinates2D crop_size,
                     InterpolationPolicy method = InterpolationPolicy::BILINEAR, float extrapolation_value = 0);
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     * @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used.
+     * @note Start and end indices of boxes are inclusive.
+     *
+     * @param[in]  compile_context     The compile context to be used.
+     * @param[in]  input               Source tensor containing N batches of 3D images to be cropped. Data type supported: U16/S16/U32/S32/F16/F32
+     * @param[in]  boxes               Tensor containing the boxes used to crop the images. It has to be known before configuration. Data type supported: F32
+     * @param[in]  box_ind             One dimensional tensor containing the batch index of the 3D image in @p input that the corresponding
+     *                                 box in @p boxes will be applied to. It has to be known before configuration. Data type supported: F32
+     * @param[out] output              Destination tensor containing a cropped and resized image for each box in @p boxes. Data type supported: F32
+     * @param[in]  crop_size           The dimensions that each cropped image will be resized to.
+     * @param[in]  method              The policy to be used when resizing image. Default is bilinear.
+     * @param[in]  extrapolation_value Value to be used for values outside of the image for cropping and resizing. Default is 0.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *boxes, ICLTensor *box_ind, ICLTensor *output, Coordinates2D crop_size,
+                   InterpolationPolicy method = InterpolationPolicy::BILINEAR, float extrapolation_value = 0);
  
      /** Static function to check if given info will lead to a valid configuration of @ref NESlice
       *
@@ -109,6 +127,8 @@ public:
      std::vector<std::unique_ptr<CLCopyKernel>> _copy;
      std::vector<std::unique_ptr<CLTensor>>     _crop_results;
      std::vector<std::unique_ptr<CLTensor>>     _scaled_results;
+
+    std::vector<std::unique_ptr<ICLKernel>> _internal_kernels;
  };
  } // namespace arm_compute
  #endif /* ARM_COMPUTE_CL_CROP_RESIZE_H */
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h

index 78c149d933588c16dfb19d44bd1f5a654df14c69..c75b58613225da31d379fbf654904e0853a8c98d 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
@@ -55,6 +55,19 @@ public:
       *
       */
      void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info, const WeightsInfo &weights_info = WeightsInfo());
+    /** Set the input, weights, biases and output tensors.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+     * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
+     * @param[in]     bias            (Optional) The biases have one dimension. Data type supported: Same as @p input.
+     * @param[out]    output          Output tensor. The output has the same number of dimensions as the @p input.
+     * @param[in]     deconv_info     Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo.
+     * @param[in]     weights_info    (Optional) Weights information needed for @ref CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info,
+                   const WeightsInfo &weights_info = WeightsInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayer
       *
       * @param[in] input        Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h

index 5a1009c79f6bc85dcad2a28347eb2ba10c952cb2..2d3dde1ea03c67c76ebc22727eee5f42012c2cd5 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
@@ -64,6 +64,14 @@ public:
       * @param[in]      info   Contains padding and policies to be used in the deconvolution.
       */
      void configure(ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
+    /** Initialize the function's source, destination, interpolation type and border_mode.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor. Data type supported: All.
+     * @param[out]     output          Destination tensor. Data type supported: same as @p input.
+     * @param[in]      info            Contains padding and policies to be used in the deconvolution.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
      /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
       *
       * @param[in] input  Source tensor info. Data type supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h

index 1b9476c3a5311f9113a39894dc07f2c9d2ca5962..910b9eac517edef861a671cce7fc953acdbd495f 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -57,6 +57,27 @@ public:
       * @param[in]  shift  Value for down/up conversions. Must be 0 <= shift < 8.
       */
      void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
+    /** Initialize the function's source, destination
+     *
+     * Input data type must be different than output data type.
+     *
+     * Valid conversions Input -> Output :
+     *
+     *   - U8  -> S8, U16, S16, U32, S32, F16, F32
+     *   - U16 -> U8, S8, S16, U32, S32, F16, F32
+     *   - S16 -> U8, S8, U16, U32, S32, F16, F32
+     *   - U32 -> U8, S8, U16, S16, S32, F16, F32
+     *   - S32 -> U8, S8, U16, S16, U32, F16, F32
+     *   - F16 -> U8, S8, U16, S16, U32, F32
+     *   - F32 -> U8, S8, U16, S16, U32, F16
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[out] output          The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  policy          Conversion policy.
+     * @param[in]  shift           Value for down/up conversions. Must be 0 <= shift < 8.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
      /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayer
       *
       * @param[in] input  Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h

index 0c33ed34beb9ff4498f11dde9f87fc464d9e3453..dbf5898319c97ab51e766bf2ef5ff41137f3324d 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
@@ -42,6 +42,14 @@ public:
       * @param[in]  block_shape Block shape value.
       */
      void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     * @param[in]  block_shape     Block shape value.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
      /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayer.
       *
       * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h

index 4668e82bab402b3a6fcb8dd0d63797fc1048ec14..63c359e68caa899b45749d5598c8cb6fbd86757c 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -70,6 +70,22 @@ public:
       */
      void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
                     ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+    /** Initialize the function's source, destination, weights and convolution information.
+     *
+     * @param[in]      compile_context  The compile context to be used.
+     * @param[in, out] input            Source tensor. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
+     * @param[in]      weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
+     *                                  Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+     *                                  Data type supported: Same as @p input, S32 when input is QASYMM8.
+     * @param[out]     output           Destination tensor. Data type supported: same as @p input.
+     * @param[in]      conv_info        Padding and stride information to use for the convolution.
+     * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
+     * @param[in]      dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer
       *
@@ -150,6 +166,22 @@ private:
           */
          void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
                         ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+        /** Initialize the function's source, destination, conv and border_size.
+         *
+         * @param[in]      compile_context  The compile context to be used.
+         * @param[in, out] input            Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+         * @param[in]      weights          Weights tensor. A 3D tensor with shape [3, 3, IFM].
+         *                                  Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+         * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+         *                                  Data type supported: Same as @p input.
+         * @param[out]     output           Destination tensor. Data type supported: same as @p input.
+         * @param[in]      conv_info        Padding and stride information to use for the convolution.
+         * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+         * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
+         * @param[in]      dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+         */
+        void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                       unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
  
          /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
           *
@@ -234,6 +266,22 @@ private:
           */
          void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
                         unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+        /** Initialize the function's source, destination, weights and convolution information.
+         *
+         * @param[in]      compile_context  The compile context to be used.
+         * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling).
+         * @param[in]      weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
+         *                                  Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+         * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+         *                                  Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+         * @param[out]     output           Destination tensor. Data type supported: same as @p input.
+         * @param[in]      conv_info        Padding and stride information to use for the convolution.
+         * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+         * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
+         * @param[in]      dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+         */
+        void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                       unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
  
          /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerGeneric
           *
@@ -328,6 +376,23 @@ public:
      ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, CLDepthwiseConvolutionLayer)
      void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
                     ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+    /** Initialize the function's source, destination, conv and border_size.
+     *
+     * @param[in]      compile_context  The compile context to be used.
+     * @param[in, out] input            Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+     * @param[in]      weights          Weights tensor. A 3D tensor with shape [3, 3, IFM].
+     *                                  Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+     *                                  Data type supported: Same as @p input.
+     * @param[out]     output           Destination tensor. Data type supported: same as @p input.
+     * @param[in]      conv_info        Padding and stride information to use for the convolution.
+     * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
+     * @param[in]      dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     */
+    ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, CLDepthwiseConvolutionLayer)
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
       *
diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h

index 48d6ba843516ed505641cbd877efb552e67e8619..c0a0fcd98800b8746e5e151d193c4964ef4bbc13 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
@@ -44,6 +44,14 @@ public:
       * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.
+     *                             Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     * @param[out] output          Destination tensor with the same dimensions of input. Data type supported: F16/F32.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayer
       *
       * @param[in] input  Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
diff --git a/arm_compute/runtime/CL/functions/CLDerivative.h b/arm_compute/runtime/CL/functions/CLDerivative.h

index 1155d401ee0c54735795951a5a0a32c4699b1ee3..5875ceb86dca8d885805924864dfffd604f0e83e 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLDerivative.h
+++ b/arm_compute/runtime/CL/functions/CLDerivative.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -54,6 +54,19 @@ public:
       *
       */
      void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination tensor. Derivative along the X direction. Data types supported: S16.
+     * @param[out]    output_y              (optional) Destination tensor. Derivative along the Y direction. Data types supported: S16.
+     * @param[in]     border_mode           Border mode to use
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
  };
  }
  #endif /* ARM_COMPUTE_CLDERIVATIVE_H */
diff --git a/arm_compute/runtime/CL/functions/CLDilate.h b/arm_compute/runtime/CL/functions/CLDilate.h

index ceea4567b2cd2bcb7294ff89827df9f9e2edf3fd..cc84820f9f15b89a237b2b370234c35021400162 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLDilate.h
+++ b/arm_compute/runtime/CL/functions/CLDilate.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -50,6 +50,15 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the kernel's inputs, output and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Output tensor. Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
  };
  }
  #endif /*ARM_COMPUTE_CLDILATE_H */
diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h

index 045b1c0c9957cf999f89d470008d89f70a57a237..0c81ffa4603a81c16b75a729e25c4183cac4ae34 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
@@ -57,6 +57,22 @@ public:
       * @param[in]  act_info  (Optional) Activation layer information in case of a fused activation.
       */
      void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                             while every optional dimension from 4 and above represent a batch of inputs.
+     *                             Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+     * @param[in]  weights         Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in]  biases          Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+     *                             Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type.
+     * @param[out] output          Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                             Data types supported: Same as @p input.
+     * @param[in]  conv_info       Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   const ActivationLayerInfo &act_info = ActivationLayerInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayer
       *
       * @param[in] input     Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h

index 6632bfce80a7e7b864ff4fd3aa9ff8b1fa5bdd7b..1fed460e69d08521e11e891b731795b25c0a297f 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
@@ -98,6 +98,21 @@ public:
       *
       */
      void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info, const WeightsInfo &weights_info = WeightsInfo());
+    /** Set the input, weights, biases and output tensors.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
+     *                                Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+     * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
+     * @param[in]     bias            (Optional) The biases have one dimension.
+     *                                Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+     * @param[out]    output          Output tensor. The output has the same number of dimensions as the @p input.
+     * @param[in]     info            Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+     * @param[in]     weights_info    (Optional) Weights information needed for @ref CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info,
+                   const WeightsInfo &weights_info = WeightsInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLDirectDeconvolutionLayer
       *
       * @param[in] input        Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
diff --git a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h

index e2503f7bdfd9b2002c6a8aecb051148a2f7663f1..19729b61ccc766d88679d43f306d8cfad157e441 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h
+++ b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -40,6 +40,13 @@ public:
       * @param[out] output Output tensor. Data types supported: same as @p input.
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLRsqrtLayer
       *
       * @param[in] input  First tensor input info. Data types supported: F16/F32.
@@ -60,6 +67,13 @@ public:
       * @param[out] output Output tensor. Data types supported: same as @p input.
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLExpLayer
       *
       * @param[in] input  First tensor input info. Data types supported: F16/F32.
@@ -80,6 +94,13 @@ public:
       * @param[out] output Output tensor. Data types supported: same as @p input.
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLNegLayer
       *
       * @param[in] input  First tensor input info. Data types supported: F16/F32.
@@ -100,6 +121,13 @@ public:
       * @param[out] output Output tensor. Data types supported: same as @p input.
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLSinLayer
       *
       * @param[in] input  First tensor input info. Data types supported: F16/F32.
@@ -120,6 +148,13 @@ public:
       * @param[out] output Output tensor. Data types supported: same as @p input.
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLLogLayer
       *
       * @param[in] input  First tensor input info. Data types supported: F16/F32.
@@ -140,6 +175,13 @@ public:
       * @param[out] output Output tensor. Data types supported: same as @p input.
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLAbsLayer
       *
       * @param[in] input  First tensor input info. Data types supported: F16/F32.
@@ -160,6 +202,13 @@ public:
       * @param[out] output Output tensor. Data types supported: same as @p input.
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLRoundLayer
       *
       * @param[in] input  First tensor input info. Data types supported: F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h

index 6d9f3a0e970f9dff8d932567c5fc3dfbb27095e1..8c656ed8bcd8166f3aabfa517aa2ff461e31936c 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
+++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
@@ -50,6 +50,18 @@ public:
       * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
       */
      void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QSYMM16 (only if @p input1 is QSYMM16), S16/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QSYMM16 (only if both inputs is QSYMM16), S16/F16/F32.
+     * @param[in]      policy          Policy to use to handle overflow.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for addition
       *
       * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
@@ -82,6 +94,18 @@ public:
       * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
       */
      void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32.
+     * @param[in]      policy          Policy to use to handle overflow.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for subtraction
       *
       * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
@@ -113,6 +137,17 @@ public:
       * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
       */
      void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Same as @p input1.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: Same as @p input1.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticDivision
       *
       * @param[in] input1   First tensor input info. Data types supported: F16/F32.
@@ -143,6 +178,17 @@ public:
       * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
       */
      void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for max
       *
       * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
@@ -173,6 +219,17 @@ public:
       * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
       */
      void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for min
       *
       * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
@@ -203,6 +260,17 @@ public:
       * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
       */
      void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for squared difference
       *
       * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
@@ -233,6 +301,17 @@ public:
       * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
       */
      void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported:F16/F32.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for power
       *
       * @param[in] input1   First tensor input info. Data types supported: F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h

index 79c18fae9f7f75e6cf8fe127471d5624ba7d11af..d907cfb0923cf19db0c36bc6dcfef6ed0dd395bd 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h
+++ b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -54,6 +54,13 @@ public:
       * @param[out] output Output of same data type with equalized brightness and contrast.
       */
      void configure(const ICLImage *input, ICLImage *output);
+    /** Initialise the kernel's inputs.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input image. Data types supported: U8.
+     * @param[out] output          Output of same data type with equalized brightness and contrast.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output);
  
      // Inherited methods overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLErode.h b/arm_compute/runtime/CL/functions/CLErode.h

index a438f4e1141b0642f63ad64b25ce141c4d7a1960..57f701cce26468dd5e16a90cbf1fd212d49aa98d 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLErode.h
+++ b/arm_compute/runtime/CL/functions/CLErode.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -50,6 +50,15 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the kernel's inputs, output and border mode
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Output tensor. Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
  };
  }
  #endif /*ARM_COMPUTE_CLERODE_H */
diff --git a/arm_compute/runtime/CL/functions/CLFFT1D.h b/arm_compute/runtime/CL/functions/CLFFT1D.h

index 31e57e13c6b9fbaafe6c0dd160cab4c2d9445461..da153225c865e2511be5cb398c2d49b22a41f962 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLFFT1D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT1D.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -56,6 +56,14 @@ public:
       * @param[in]  config FFT related configuration
       */
      void configure(const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: F32.
+     * @param[out] output          Destination tensor. Data types and data layouts supported: Same as @p input.
+     * @param[in]  config          FFT related configuration
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config);
      /** Static function to check if given info will lead to a valid configuration of @ref CLFFT1D.
       *
       * @param[in] input  Source tensor info. Data types supported: F32.
diff --git a/arm_compute/runtime/CL/functions/CLFFT2D.h b/arm_compute/runtime/CL/functions/CLFFT2D.h

index d34528b9cfa2a77a149110b02927abfc21531ba4..a113f20f910b950bffe4ec38e8a2fd4f31304572 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLFFT2D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT2D.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -53,6 +53,14 @@ public:
       * @param[in]  config FFT related configuration
       */
      void configure(const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: F32.
+     * @param[out] output          Destination tensor. Data types and data layouts supported: Same as @p input.
+     * @param[in]  config          FFT related configuration
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config);
      /** Static function to check if given info will lead to a valid configuration of @ref CLFFT2D.
       *
       * @param[in] input  Source tensor info. Data types supported: F32.
diff --git a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h

index 34bb93ab54ec3cd4ff3c2d1201bc9aa39ad7ea4b..740731950e55e81f9cd30547092b3cb0774b9494 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -85,6 +85,23 @@ public:
       */
      void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
                     const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Set the input and output tensors.
+     *
+     * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                             while every optional dimension from 4 and above represent a batch of inputs.
+     *                             Data types supported: F32.
+     * @param[in]  weights         Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in]  biases          Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+     * @param[out] output          Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                             Data types supported: Same as @p input.
+     * @param[in]  conv_info       Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   const ActivationLayerInfo &act_info = ActivationLayerInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLFFTConvolutionLayer
       *
       * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
diff --git a/arm_compute/runtime/CL/functions/CLFastCorners.h b/arm_compute/runtime/CL/functions/CLFastCorners.h

index 2a0e0104b8c503f29943b18ee22697009ead1435..1dc87d6a38d41501c55bcf0fe19067ae3c298cc9 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLFastCorners.h
+++ b/arm_compute/runtime/CL/functions/CLFastCorners.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -71,6 +71,19 @@ public:
       */
      void configure(const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners, unsigned int *num_corners,
                     BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in]     input                 Source image. Data types supported: U8.
+     * @param[in]     threshold             Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
+     * @param[in]     nonmax_suppression    If true, non-maximum suppression is applied to detected corners before being placed in the array.
+     * @param[out]    corners               Array of keypoints to store the results.
+     * @param[in,out] num_corners           Record number of corners in the array
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners, unsigned int *num_corners,
+                   BorderMode border_mode, uint8_t constant_border_value = 0);
      // Inherited methods overridden:
      void run() override;
  
diff --git a/arm_compute/runtime/CL/functions/CLFill.h b/arm_compute/runtime/CL/functions/CLFill.h

index c4ba2577539340f8f904acdf6db09853810fa069..bb1216054f2ca1563a6a310b02d241275b0b5652 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLFill.h
+++ b/arm_compute/runtime/CL/functions/CLFill.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -42,6 +42,13 @@ public:
       * @param[in]     constant_value Constant value to use to fill tensor.
       */
      void configure(ICLTensor *tensor, PixelValue constant_value);
+    /** Initialize the function
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] tensor          Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]     constant_value  Constant value to use to fill tensor.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *tensor, PixelValue constant_value);
  };
  } // namespace arm_compute
  #endif /*ARM_COMPUTE_CLFILL_H */
diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h

index ded79e5cb6f78c109031512372340f459e0434a7..250806b1d7f606667a25eb5d22fb1d127b3301a3 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLFillBorder.h
+++ b/arm_compute/runtime/CL/functions/CLFillBorder.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -44,6 +44,15 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+    /** Initialize the function
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] tensor                Source tensor. Data types supported: U8/S16
+     * @param[in]     border_width          The border width
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
  };
  }
  #endif /*ARM_COMPUTE_FILLBORDER_H */
diff --git a/arm_compute/runtime/CL/functions/CLFlattenLayer.h b/arm_compute/runtime/CL/functions/CLFlattenLayer.h

index b9ce236309f007b38c67c0e7dde4523dca1b90a4..98cf49af486eb317457bdac2fda7041a38ac82e9 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLFlattenLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFlattenLayer.h
@@ -47,6 +47,15 @@ public:
       *                    w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           First input tensor to flatten with at least 3 dimensions.
+     *                             The dimensions above the third will be interpreted as batches. Data types supported: All.
+     * @param[out] output          Output tensor with shape [w*h*d, input_batches] where:
+     *                    w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayer
       *
       * @param[in]  input  First input tensor to flatten with at least 3 dimensions.
diff --git a/arm_compute/runtime/CL/functions/CLFloor.h b/arm_compute/runtime/CL/functions/CLFloor.h

index c4a893fdeb15a102185c1cf2930bae9acc1e0cad..2844a5642be55307b231d95e0ac48f4752019424 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLFloor.h
+++ b/arm_compute/runtime/CL/functions/CLFloor.h
@@ -48,7 +48,7 @@ public:
       * @param[in]  input           Source tensor. Data type supported: F16/F32.
       * @param[out] output          Destination tensor. Same as @p input
       */
-    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLFloor
       *
       * @param[in] input  Source tensor info. Data type supported: F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h

index cbd28603fc36dfb86304503a6fc8678ac4687e4b..188117f674012196649b32388527d553458c2241 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -52,6 +52,13 @@ public:
       * @param[out] output Destination tensor which stores the transposed input tensor. Data type supported: Same as @p input.
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[out] output          Destination tensor which stores the transposed input tensor. Data type supported: Same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayerReshapeWeights
       *
       * @param[in] input  Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -100,7 +107,16 @@ public:
       */
      void configure(const ICLTensor *input)
      {
-        _func.configure(input, &_output);
+        configure(CLKernelLibrary::get().get_compile_context(), input);
+    }
+    /** Configures the @ref CLFullyConnectedLayerReshapeWeights function
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] input           Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input)
+    {
+        _func.configure(compile_context, input, &_output);
      }
  
  private:
@@ -147,6 +163,23 @@ public:
       */
      void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
                     FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  weights         Weights tensor. The weights must be 2 dimensional.
+     *                             If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
+     *                             If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
+     *                             Data type supported: Same as @p input.
+     * @param[in]  biases          Bias tensor. Can be nullptr. Data type supported:Same as @p input.
+     * @param[out] output          Destination tensor. Its shape should be equal to the output of a matrix multiplication between:
+     *                             - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
+     *                             - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
+     *                             Data type supported: Same as @p input.
+     * @param[in]  fc_info         (Optional) Fully connected layer additional info
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+                   FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayer
       *
       * @param[in]  input   Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -171,9 +204,9 @@ public:
      void prepare() override;
  
  private:
-    void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
-    void configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
-    void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
+    void configure_fc_fc(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
+    void configure_conv_fc(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
+    void configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
  
      MemoryGroup                                                         _memory_group;
      IWeightsManager                                                    *_weights_manager;
diff --git a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h

index 650d2e528b158bbe41cf3d8644ab5ea3599af952..9057440fc67cc03c7e47b19f283d99a1845b6e75 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
+++ b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -67,6 +67,25 @@ public:
      void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
                     const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
                     float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input_weights   Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
+     * @param[in]  bn_mean         Batch normalization layer mean tensor. Same as @p input_weights
+     * @param[in]  bn_var          Batch normalization layer variance tensor. Same as @p input_weights
+     * @param[out] fused_weights   Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
+     * @param[out] fused_bias      Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
+     * @param[in]  input_bias      (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
+     * @param[in]  bn_beta         (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
+     *                             @note if nullptr, bn_beta is set to 0.0
+     * @param[in]  bn_gamma        (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
+     *                             @note if nullptr, bn_gamma is set to 1.0
+     * @param[in]  epsilon         (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
+     * @param[in]  fbn_type        (Optional) Fused batch normalization type. Defaults to Convolution.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
+                   const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
+                   float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
      /** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalization
       *
       * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h

index 7a4f12043e78e7d331813669833529ddb46757b5..f5588112ae130e18da79ec71f07a4c44f8584148 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -79,7 +79,18 @@ public:
       */
      void configure(const ICLTensor *input, GEMMRHSMatrixInfo info)
      {
-        _kernel.configure(input, &_output, info);
+        configure(CLKernelLibrary::get().get_compile_context(), input, info);
+    }
+
+    /** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] input           Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] info            RHS matrix information to be used for reshaping.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info)
+    {
+        _kernel.configure(compile_context, input, &_output, info);
      }
  
  private:
@@ -134,6 +145,26 @@ public:
       *                       in case matrix A and matrix B have been already transformed.
       */
      void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
+    /** Initialise the kernel's inputs and output
+     *
+     * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
+     *
+     * @note All tensors must have the same data type.
+     *
+     * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  a               First input tensor  (Matrix or Vector A). Data types supported: F16/F32
+     * @param[in]  b               Second input tensor (Matrix B). Data type supported: same as @p a.
+     * @param[in]  c               Third input tensor  (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
+     * @param[out] output          Output tensor. Data type supported: same as @p a
+     * @param[in]  alpha           Weight of the matrix product
+     * @param[in]  beta            Weight of matrix C
+     * @param[in]  gemm_info       (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
+     *                       if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping
+     *                       in case matrix A and matrix B have been already transformed.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMM.
       *
       * @param[in] a         First input tensor info  (Matrix or Vector A). Data types supported: F16/F32
@@ -156,10 +187,11 @@ public:
  private:
      static CLGEMMKernelType select_gemm_kernel(unsigned int m, unsigned int n, unsigned int k, DataType data_type, bool reshape_b_only_on_first_run);
  
-    void configure_native_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped_only_rhs(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+    void configure_native_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+    void configure_reshaped_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+    void configure_reshaped_v2(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+    void configure_reshaped_only_rhs(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta,
+                                     const GEMMInfo &gemm_info);
  
      static Status validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
      static Status validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h

index 4952029c9d353308cb8c4f37755214b8b0a71199..6d1181eefea7cc529f3245fe77b740bce589a415 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -62,6 +62,16 @@ public:
       * @param[in]  num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
       */
      void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  weights         Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+     *                             Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+     * @param[in]  biases          Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
+     * @param[out] output          Destination tensor. Data types supported: Same as @p weights.
+     * @param[in]  num_groups      (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
      /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayerReshapeWeights
       *
       * @param[in] weights    Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
@@ -93,10 +103,21 @@ public:
       * @param[in] num_groups Number of groups when performing a grouped convolution.
       */
      void configure(const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups)
+    {
+        configure(CLKernelLibrary::get().get_compile_context(), input, biases, num_groups);
+    }
+    /** Configures the @ref CLConvolutionLayerReshapeWeights function
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] input           Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in] biases          Biases tensor. Data type supported: Same as @p input.
+     * @param[in] num_groups      Number of groups when performing a grouped convolution.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups)
      {
          _bias_bit   = (biases != nullptr) ? 1 : 0;
          _num_groups = num_groups;
-        _func.configure(input, biases, &_output, num_groups);
+        _func.configure(compile_context, input, biases, &_output, num_groups);
      }
  
      //Inherited method override
@@ -178,6 +199,28 @@ public:
       */
      void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
                     const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                             while every optional dimension from 4 and above represent a batch of inputs.
+     *                             Data types supported: QASYMM8/F16/F32.
+     * @param[in]  weights         Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+     *                             Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in]  biases          Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+     *                             Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+     * @param[out] output          Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                             Data types supported: Same as @p input.
+     * @param[in]  conv_info       Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  weights_info    Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+     *                             tensor has also been transposed with CLGEMMReshapeRHSMatrixKernel. Data type supported: Same as @p input.
+     * @param[in]  dilation        (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
+     * @param[in]  num_groups      (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   const WeightsInfo &weights_info = WeightsInfo(),
+                   const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer.
       *
       * @param[in]  input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -208,6 +251,7 @@ public:
  private:
      /** Configures the appropriate matrix multiply routine
       *
+     * @param[in]      compile_context       The compile context to be used.
       * @param[in]      input                 Input tensor. Data types supported: QASYMM8/F16/F32.
       * @param[in]      weights               Weights tensor. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
       * @param[in]      biases                Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
@@ -218,8 +262,9 @@ private:
       * @param[in]      gemm_3d_depth         Depth of GEMM 3D
       * @param[in]      act_info              Activation to apply after the matrix multiplication
       */
-    void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth,
-                      const ActivationLayerInfo &act_info);
+    void configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+                      const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
+                      int gemm_3d_depth, const ActivationLayerInfo &act_info);
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines
       *
       * @param[in] input                 Input tensor info. Data types supported: QASYMM8/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h

index 01687b69eca3d31502257822bf28f0e4aa02102e..d8710a461f0c89a07522c68594e4a4a9b295edd8 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h
@@ -99,6 +99,17 @@ public:
       * @param[in]     deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This function supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
       */
      void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info);
+    /** Set the input, weights, biases and output tensors.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
+     *                                Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. Data layout supported: NHWC
+     * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. Data layout supported: same as @p input.
+     * @param[in]     bias            (Optional) The biases have one dimension. Data type supported: Same as @p input. Data layout supported: same as @p input.
+     * @param[out]    output          Output tensor. The output has the same number of dimensions as the @p input. Data layout supported: same as @p input.
+     * @param[in]     deconv_info     Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This function supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info);
      /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayer
       *
       * @param[in] input       Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h

index 1d7013d3285a19d411825d743a7d600e8df4d2cd..6ac3cefb7633cf69526bdd7a96d8ee124e26af33 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -72,6 +72,25 @@ public:
       *                       if the reshape of matrix B should be executed only for the first run
       */
      void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info = GEMMInfo());
+    /** Initialise the kernel's inputs, output
+     *
+     * @note GEMMLowp:  low precision GEMM kernel. [A * B + C]
+     *  This kernel performs the following computations:
+     *
+     *  -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
+     *  -# Convert b values from QASYMM8 to int32 and add b_offset to each of them.
+     *  -# Compute the matrix product of the resulting a * b in int32.
+     *  -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  a               First input tensor  (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED.
+     * @param[in]  b               Second input tensor (Matrix B). Data type supported: same as @p a
+     * @param[in]  c               Third input tensor  (Matrix C). It can be a nullptr. Data type supported: S32
+     * @param[out] output          Output tensor. Data type supported: S32 or QASYMM8/QASYMM8_SIGNED if gemm_info.gemmlowp_output_stage != NONE
+     * @param[in]  gemm_info       (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
+     *                       if the reshape of matrix B should be executed only for the first run
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info = GEMMInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyCore
       *
       * @param[in] a         First input tensor info (Matrix A). Data type supported: QASYMM8.
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h

index 4c11e519507bf82efdb01983b67018884e692111..06cb759b165f85c06e67b6fc72c4d57069bca37b 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
@@ -75,6 +75,23 @@ public:
      ARM_COMPUTE_DEPRECATED_REL(20.05)
      void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
                     int max = std::numeric_limits<int32_t>::max());
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
+     * @param[in]  bias            Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+     *                             Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output          Output tensor. Data type supported: QASYMM8
+     * @param[in]  result_offset   Offset to be added to each element of the input matrix
+     * @param[in]  result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add
+     * @param[in]  result_shift    Number of bits to shift right the result before converting back to QASYMM8
+     * @param[in]  min             (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
+     * @param[in]  max             (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
+     *                             Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
+     */
+    ARM_COMPUTE_DEPRECATED_REL(20.05)
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift,
+                   int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8Scale
       *
       * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
@@ -137,6 +154,23 @@ public:
       */
      void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
                     int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  compile_context              The compile context to be used.
+     * @param[in]  input                        Input tensor. Data type supported: S32
+     * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output                       Output tensor. Data type supported: QASYMM8
+     * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+     * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
+     * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
+     * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,
+                   int result_offset_after_shift,
+                   int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
       *
       * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
@@ -198,6 +232,23 @@ public:
       */
      void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
                     int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  compile_context              The compile context to be used.
+     * @param[in]  input                        Input tensor. Data type supported: S32
+     * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output                       Output tensor. Data type supported: QASYMM8_SIGNED
+     * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+     * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
+     * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8_SIGNED
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
+     * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,
+                   int result_offset_after_shift,
+                   int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
       *
       * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
@@ -240,6 +291,23 @@ public:
      ARM_COMPUTE_DEPRECATED_REL(20.05)
      void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = std::numeric_limits<int32_t>::lowest(),
                     int max = std::numeric_limits<int32_t>::max());
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data type supported: S32
+     * @param[in]  bias            Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                             Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output          Output tensor. Data type supported: QASYMM8
+     * @param[in]  multiplier      Float multiplier to be multiplied to each element of the input matrix
+     * @param[in]  offset          Offset to be applied to result before converting it back to QASYMM8
+     * @param[in]  min             (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
+     * @param[in]  max             (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
+     *                        Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
+     */
+    ARM_COMPUTE_DEPRECATED_REL(20.05)
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset,
+                   int min = std::numeric_limits<int32_t>::lowest(),
+                   int max = std::numeric_limits<int32_t>::max());
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
       *
       * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
@@ -300,6 +368,21 @@ public:
       */
      void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
                     int max = std::numeric_limits<int32_t>::max());
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  compile_context              The compile context to be used.
+     * @param[in]  input                        Input tensor. Data type supported: S32
+     * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output                       Output tensor. Data type supported: QSYMM16
+     * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+     * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
+     * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,
+                   int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint
       *
       * @param[in] input  Input tensor info. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
@@ -336,6 +419,16 @@ public:
       * @param[in]  info   GEMMLowp output stage metadata.
       */
      void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info);
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data type supported: S32
+     * @param[in]  bias            Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                             Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output          Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
+     * @param[in]  info            GEMMLowp output stage metadata.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info);
      /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
       *
       * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
diff --git a/arm_compute/runtime/CL/functions/CLGather.h b/arm_compute/runtime/CL/functions/CLGather.h

index 71843aa42a718cc39a7def388ee548513ddc3643..dcd9efc6e09d09d292b9e8c43858414e0a97a834 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLGather.h
+++ b/arm_compute/runtime/CL/functions/CLGather.h
@@ -43,6 +43,15 @@ public:
       * @param[in]  axis    (Optional) The axis in @p input to gather @p indices from. Defaults to 0
       */
      void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
+    /** Initialise the kernel's inputs and outputs
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Supported tensor rank: up to 4. Data type supported: All.
+     * @param[in]  indices         Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
+     * @param[out] output          Destination tensor. Data type supported: Same as @p input
+     * @param[in]  axis            (Optional) The axis in @p input to gather @p indices from. Defaults to 0
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel
       *
diff --git a/arm_compute/runtime/CL/functions/CLGaussian3x3.h b/arm_compute/runtime/CL/functions/CLGaussian3x3.h

index 2caf6c9d74ebf7fffdc980ddaa5be09f4f86f082..f1906cde929fa74430a809c0636378624e6d565c 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLGaussian3x3.h
+++ b/arm_compute/runtime/CL/functions/CLGaussian3x3.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -50,6 +50,15 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
  };
  }
  #endif /*ARM_COMPUTE_CLGAUSSIAN3X3_H */
diff --git a/arm_compute/runtime/CL/functions/CLGaussian5x5.h b/arm_compute/runtime/CL/functions/CLGaussian5x5.h

index 5d121a44881f6b1c93b2ae1595c1cfd389ebfec6..d4ed77234233d36472cbfe23f42e676285dc0819 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLGaussian5x5.h
+++ b/arm_compute/runtime/CL/functions/CLGaussian5x5.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -62,6 +62,15 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
  
      // Inherited methods overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h

index aa90a5d4e3c86d6f6e84af2c6d54d95f2907c5fb..a75a4d10282288bdfb91483887d7b6474e032835 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h
+++ b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -65,6 +65,16 @@ public:
       *
       */
      virtual void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value = 0) = 0;
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]      compile_context       The compile context to be used.
+     * @param[in, out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]     pyramid               Destination pyramid tensors, Data types supported at each level: U8.
+     * @param[in]      border_mode           Border mode to use.
+     * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    virtual void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value = 0) = 0;
  
  protected:
      ICLTensor *_input;
@@ -86,6 +96,7 @@ public:
  
      // Inherited methods overridden:
      void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
      void run() override;
  
  private:
@@ -109,6 +120,7 @@ public:
  
      // Inherited methods overridden:
      void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
      void run() override;
  
  private:
diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h

index fb6967f7e528be92850f3a0ac1a55a806d034ae4..91b30fabcb9d60ad6af440c100471067745766da 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
@@ -85,6 +85,24 @@ public:
       */
      void configure(const ICLTensor *scores, const ICLTensor *deltas, const ICLTensor *anchors, ICLTensor *proposals, ICLTensor *scores_out, ICLTensor *num_valid_proposals,
                     const GenerateProposalsInfo &info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context     The compile context to be used.
+     * @param[in]  scores              Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors.
+     *                                 Data types supported: QASYMM8/F16/F32
+     * @param[in]  deltas              Bounding box deltas from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores
+     * @param[in]  anchors             Anchors tensor of size (4, A). Data types supported: QSYMM16 with scale of 0.125 if @p scores is QASYMM8, otherwise same as @p scores
+     * @param[out] proposals           Box proposals output tensor of size (5, W*H*A).
+     *                                 Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p scores is QASYMM8, otherwise same as @p scores
+     * @param[out] scores_out          Box scores output tensor of size (W*H*A). Data types supported: Same as @p scores
+     * @param[out] num_valid_proposals Scalar output tensor which says which of the first proposals are valid. Data types supported: U32
+     * @param[in]  info                Contains GenerateProposals operation information described in @ref GenerateProposalsInfo
+     *
+     * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the @ref GenerateProposalsInfo struct.
+     * @note Proposals contains all the proposals. Of those, only the first num_valid_proposals are valid.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *scores, const ICLTensor *deltas, const ICLTensor *anchors, ICLTensor *proposals, ICLTensor *scores_out,
+                   ICLTensor *num_valid_proposals, const GenerateProposalsInfo &info);
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLGenerateProposalsLayer
       *
diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h

index 3214e8c3f1e0ae5b5b8d413778d684cd730a5771..71280c898ad8fff2067dc8df3b04bc8682c1890e 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
+++ b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -59,6 +59,17 @@ public:
       * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destination, HOG data-object and border mode
+     *
+     * @param[in]      compile_context       The compile context to be used.
+     * @param[in, out] input                 Input tensor. Data type supported: U8
+     *                                       (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]     output                Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block
+     * @param[in]      hog                   HOG data object which describes the HOG descriptor
+     * @param[in]      border_mode           Border mode to use.
+     * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0);
  
      // Inherited method overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLHOGDetector.h b/arm_compute/runtime/CL/functions/CLHOGDetector.h

index 6703de9f3586862dea2a8066169f84df9e639e7a..c2bdc15b35ecbe4be8db23b57766f806dbb20c07 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLHOGDetector.h
+++ b/arm_compute/runtime/CL/functions/CLHOGDetector.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -64,6 +64,22 @@ public:
       * @param[in]  idx_class               (Optional) Index of the class used for evaluating which class the detection window belongs to
       */
      void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0);
+    /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
+     *
+     * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
+     *
+     * @param[in]  compile_context         The compile context to be used.
+     * @param[in]  input                   Input tensor. It is the output of @ref CLHOGDescriptor. Data type supported: F32
+     * @param[in]  hog                     HOG data-object that describes the HOG descriptor
+     * @param[out] detection_windows       Array of @ref DetectionWindow used to store the detected objects
+     * @param[in]  detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
+     *                                     It must be multiple of the block stride stored in hog
+     * @param[in]  threshold               (Optional) Threshold for the distance between features and SVM classifying plane
+     * @param[in]  idx_class               (Optional) Index of the class used for evaluating which class the detection window belongs to
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride,
+                   float threshold = 0.0f,
+                   size_t idx_class = 0);
  
      // Inherited methods overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h

index ec4a18786414a24dc5aa1f458a143591845abef5..450a4a6045a3033aad973649c392ccbbf0c0cccd 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLHOGGradient.h
+++ b/arm_compute/runtime/CL/functions/CLHOGGradient.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -61,6 +61,19 @@ public:
       * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations, phase type and border mode
+     *
+     * @param[in]      compile_context       The compile context to be used.
+     * @param[in, out] input                 Input tensor. Data type supported: U8.
+     *                                       (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]     output_magnitude      Output tensor (magnitude). Data type supported: U16.
+     * @param[out]     output_phase          Output tensor.(phase). Format supported: U8
+     * @param[in]      phase_type            Type of @ref PhaseType
+     * @param[in]      border_mode           Border mode to use
+     * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode,
+                   uint8_t constant_border_value = 0);
  
      // Inherited method overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h

index 424c69dad88091bfea4e40c5d61ac924b9cd2af8..3d22ff69ee6ca49bd70edaf72ba3630ec9b31e50 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
+++ b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -82,8 +82,29 @@ public:
       *
       */
      void configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode,
-                   uint8_t constant_border_value = 0,
-                   float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f);
+                   uint8_t constant_border_value = 0, float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f);
+    /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
+     *
+     * @param[in]      compile_context          The compile context to be used.
+     * @param[in, out] input                    Input tensor. Data type supported: U8
+     *                                          (Written to only for @p border_mode != UNDEFINED)
+     * @param[in]      multi_hog                Container of multiple HOG data object. Each HOG data object describes one HOG model to detect.
+     *                                          This container should store the HOG data-objects in descending or ascending cell_size width order.
+     *                                          This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects
+     * @param[out]     detection_windows        Array of @ref DetectionWindow used for locating the detected objects
+     * @param[in]      detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object
+     *                                          The dimension of this array must be the same of multi_hog->num_models()
+     *                                          The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array
+     * @param[in]      border_mode              Border mode to use.
+     * @param[in]      constant_border_value    (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     * @param[in]      threshold                (Optional) Threshold for the distance between features and SVM classifying plane
+     * @param[in]      non_maxima_suppression   (Optional) Flag to specify whether the non-maxima suppression is required or not.
+     *                                          True if the non-maxima suppression stage has to be computed
+     * @param[in]      min_distance             (Optional) Radial Euclidean distance to use for the non-maxima suppression stage
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides,
+                   BorderMode border_mode, uint8_t constant_border_value = 0, float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f);
  
      // Inherited method overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h

index 6c89d6dea6339f5897b0fc2fd0cffd2374243cc2..2d0e78b00ecd0f696d3df2cf956e117dee84165f 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLHarrisCorners.h
+++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -82,6 +82,23 @@ public:
      void configure(ICLImage *input, float threshold, float min_dist, float sensitivity,
                     int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners,
                     BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false);
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source image. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[in]     threshold             Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
+     * @param[in]     min_dist              Radial Euclidean distance for the euclidean distance stage.
+     * @param[in]     sensitivity           Sensitivity threshold k from the Harris-Stephens equation
+     * @param[in]     gradient_size         The gradient window size to use on the input. The implementation supports 3, 5, and 7
+     * @param[in]     block_size            The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7.
+     * @param[out]    corners               Array of keypoints to store the results.
+     * @param[in]     border_mode           Border mode to use
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     * @param[in]     use_fp16              (Optional) If true the FP16 kernels will be used. If false F32 kernels are used.
+     */
+    void configure(const CLCompileContext &compile_context, ICLImage *input, float threshold, float min_dist, float sensitivity,
+                   int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners,
+                   BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false);
  
      // Inherited methods overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLHistogram.h b/arm_compute/runtime/CL/functions/CLHistogram.h

index ad389248f735aa4711191b395533ca0959ff8be8..6d34dd7060ca0ad7f24ee32507672ea106c85317 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLHistogram.h
+++ b/arm_compute/runtime/CL/functions/CLHistogram.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -55,6 +55,13 @@ public:
       * @param[out] output Output distribution.
       */
      void configure(const ICLImage *input, ICLDistribution1D *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source image. Data types supported: U8
+     * @param[out] output          Output distribution.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
  
      // Inherited methods overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h

index ddd4b12ecac9d602aaf944c59667e5280091a10d..4614b90c703605d4427727ac496e57c53808627f 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
@@ -51,6 +51,18 @@ public:
       * @param[in]      use_mixed_precision (Optional) Use mixed precision in case of FP16 execution
       */
      void configure(ICLTensor *input, ICLTensor *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f, bool use_mixed_precision = true);
+    /** Set the input and output tensors.
+     *
+     * @param[in]      compile_context     The compile context to be used.
+     * @param[in, out] input               Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
+     *                                     Data types supported: F16/F32. Data layout supported: NHWC, NCHW
+     * @param[out]     output              Destination tensor. Data types and data layouts supported: same as @p input.
+     * @param[in]      gamma               (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0
+     * @param[in]      beta                (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0
+     * @param[in]      epsilon             (Optional) Lower bound value for the normalization. Defaults to 1e-12
+     * @param[in]      use_mixed_precision (Optional) Use mixed precision in case of FP16 execution
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f, bool use_mixed_precision = true);
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer.
       *
diff --git a/arm_compute/runtime/CL/functions/CLIntegralImage.h b/arm_compute/runtime/CL/functions/CLIntegralImage.h

index 2a452a97a3d18091c43a648e8009d915f2183192..1ea189bf3399f36871144570e04e1bd44d193de9 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLIntegralImage.h
+++ b/arm_compute/runtime/CL/functions/CLIntegralImage.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -48,6 +48,13 @@ public:
       * @param[out] output Destination tensor, Data types supported: U32.
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U8.
+     * @param[out] output          Destination tensor, Data types supported: U32.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
  
      // Inherited methods overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h

index e200dc758ec67f00b313cd0c22e9b354df3faf3c..91c547b2cc6dae686656cd8c4299062ddcfc9860 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
+++ b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -59,6 +59,15 @@ public:
       * @param[in]  epsilon (Optional) Lower bound value for the normalization.
       */
      void configure(ICLTensor *input, ICLTensor *output, int axis, float epsilon = 1e-12f);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+     * @param[out] output          Destination tensor. Data types and data layouts supported: Same as @p input.
+     * @param[in]  axis            Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
+     * @param[in]  epsilon         (Optional) Lower bound value for the normalization.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int axis, float epsilon = 1e-12f);
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayer.
       *
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h

index a94f239472e5d4ff19814c8f9be66051b0c84b43..a29513aaae6c965ff36b4b92e7fc99441d4a4961 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
@@ -102,6 +102,52 @@ public:
                     const ICLTensor *output_state_in, const ICLTensor *cell_state_in,
                     ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output,
                     const LSTMParams<ICLTensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
+    /** Initialize function's tensors.
+     *
+     * @param[in]  compile_context             The compile context to be used.
+     * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
+     * @param[in]  input_to_forget_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
+     * @param[in]  input_to_cell_weights       2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
+     * @param[in]  input_to_output_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
+     * @param[in]  recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
+     * @param[in]  recurrent_to_cell_weights   2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
+     * @param[in]  recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
+     * @param[in]  forget_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     * @param[in]  cell_bias                   1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     * @param[in]  output_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     * @param[in]  output_state_in             2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
+     * @param[in]  cell_state_in               2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
+     * @param[out] scratch_buffer              2D tensor with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. Data type supported: Same as @p input.
+     * @param[out] output_state_out            2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
+     * @param[out] cell_state_out              2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
+     * @param[out] output                      Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].
+     *                                         Data types supported: Same as @p input.
+     * @param[in]  lstm_params                 Weights tensors used in peephole optimization:
+     *                                         input_to_input_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
+     *                                         recurrent_to_input_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
+     *                                         cell_to_input_weights      1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input.
+     *                                         cell_to_forget_weights     1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     *                                         cell_to_output_weights     1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     *                                         input_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input
+     *                                         projection_weights         2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
+     *                                         projection_bias            1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input.
+     *                                         input_layer_norm_weights   1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     *                                         forget_layer_norm_weights  1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     *                                         cell_layer_norm_weights    1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     *                                         output_layer_norm_weights  1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     * @param[in]  activation_info             Contains activation information described in @ref ActivationLayerInfo.
+     * @param[in]  cell_threshold              (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].
+     *                                         If set to 0.0f then clipping is disabled.
+     * @param[in]  projection_threshold        (Optional) The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip].
+     *                                         If set to 0.0f then clipping is disabled.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input,
+                   const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
+                   const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
+                   const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
+                   const ICLTensor *output_state_in, const ICLTensor *cell_state_in,
+                   ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output,
+                   const LSTMParams<ICLTensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLLSTMLayer
       *
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h

index 1d390600885b20b184e091be0c615565ce79a4d4..082fdb44997705c537273d64b254aaf1b9942088 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -97,6 +97,33 @@ public:
                     const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
                     ICLTensor *cell_state_in, const ICLTensor *output_state_in,
                     ICLTensor *cell_state_out, ICLTensor *output_state_out);
+    /** Initialize function's tensors.
+     *
+     * @param[in]  compile_context             The compile context to be used.
+     * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
+     * @param[in]  input_to_input_weights      2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  input_to_forget_weights     2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  input_to_cell_weights       2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  input_to_output_weights     2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  recurrent_to_input_weights  2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  recurrent_to_cell_weights   2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  recurrent_to_output_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  input_gate_bias             1D weights tensor with dimensions [output_size]. Data type supported: S32.
+     * @param[in]  forget_gate_bias            1D weights tensor with dimensions [output_size]. Data type supported: S32.
+     * @param[in]  cell_bias                   1D weights tensor with dimensions [output_size]. Data type supported: S32.
+     * @param[in]  output_gate_bias            1D weights tensor with dimensions [output_size]. Data type supported: S32.
+     * @param[in]  cell_state_in               2D tensor with dimensions [output_size, batch_size]. Data type supported:  QSYMM16.
+     * @param[in]  output_state_in             2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
+     * @param[out] cell_state_out              Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported:  QSYMM16.
+     * @param[out] output_state_out            Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input,
+                   const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
+                   const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
+                   const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
+                   ICLTensor *cell_state_in, const ICLTensor *output_state_in,
+                   ICLTensor *cell_state_out, ICLTensor *output_state_out);
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLLSTMLayerQuantized
       *
diff --git a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h

index a407e981da4c78ddb65a6a00d08abacd28df0aec..49a87baaf284a4c65ebc43476e4639e84e6779b4 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h
+++ b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -68,6 +68,19 @@ public:
       *
       */
      void configure(ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]  compile_context       The compile context to be used.
+     * @param[in]  input                 Source tensor. Data types supported: U8.
+     * @param[out] pyramid               Destination pyramid tensors, Data types supported at each level: S16.
+     * @param[out] output                The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data types supported: S16.
+     *                                   The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is:
+     *                                   output.width = input.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1)
+     * @param[in]  border_mode           Border mode to use.
+     * @param[in]  constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
  
      // Inherited methods overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h

index 3407f46887cba898e339987d292abb43aa98ff06..2c7afde7de37c2d2f20cae41fecf88a650f61771 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h
+++ b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -77,6 +77,22 @@ public:
       *
       */
      void configure(const CLPyramid *pyramid, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * The Output image must have the same size as the first level of the pyramid.
+     * The Input image must have the same size as the last level of the pyramid.
+     *
+     * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid.
+     *
+     * @param[in]  compile_context       The compile context to be used.
+     * @param[in]  pyramid               Laplacian pyramid tensors, Data types supported at each level: S16.
+     * @param[in]  input                 Source tensor. Data types supported: S16.
+     * @param[out] output                Output tensor. Data types supported: U8.
+     * @param[in]  border_mode           Border mode to use for the convolution.
+     * @param[in]  constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, const CLPyramid *pyramid, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
  
      // Inherited methods overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h

index 1186a449d58b47170c07ea125e9e2bc86c377f16..7a43eab4780e5d064775dcca35f81614ae7d1b26 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -73,6 +73,19 @@ public:
       * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
       */
      void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                             while every optional dimension from 4 and above represent a batch of inputs.
+     *                             Data types supported: F32.
+     * @param[in]  weights         Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input.
+     * @param[in]  biases          Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input.
+     * @param[out] output          Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                             Data types supported: Same as @p input.
+     * @param[in]  conv_info       Contains padding and stride information described in @ref PadStrideInfo.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
      /** Static function to check if given info will lead to a valid configuration of @ref CLLocallyConnectedLayer
       *
       * @param[in] input     Input tensor info. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/CL/functions/CLMagnitude.h b/arm_compute/runtime/CL/functions/CLMagnitude.h

index 2f5932b5aba6bc83af2928327dee7894fe45b021..e52ab240e4458ab34382815136b214d3ee90905e 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLMagnitude.h
+++ b/arm_compute/runtime/CL/functions/CLMagnitude.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -43,6 +43,15 @@ public:
       * @param[in]  mag_type (Optional) Magnitude calculation type. Default: L2NORM.
       */
      void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM);
+    /** Initialise the kernel's inputs.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          First tensor input. Data types supported: S16.
+     * @param[in]  input2          Second tensor input. Data types supported: S16.
+     * @param[out] output          Output tensor. Data types supported: S16.
+     * @param[in]  mag_type        (Optional) Magnitude calculation type. Default: L2NORM.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM);
  };
  }
  #endif /*ARM_COMPUTE_CLMAGNITUDE_H */
diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h

index fea1ed194f8ab5c03d7c7c19b0994a01f3faefae..561ac04f1de540acb1c5b8cb6ac969d1715a732f 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLMeanStdDev.h
+++ b/arm_compute/runtime/CL/functions/CLMeanStdDev.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -57,6 +57,14 @@ public:
       * @param[out]     stddev (Optional) Output standard deviation of pixel values.
       */
      void configure(ICLImage *input, float *mean, float *stddev = nullptr);
+    /** Initialise the kernel's inputs and outputs.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Input image. Data types supported: U8/F16/F32. (Written to only for border filling)
+     * @param[out]     mean            Output average pixel value.
+     * @param[out]     stddev          (Optional) Output standard deviation of pixel values.
+     */
+    void configure(const CLCompileContext &compile_context, ICLImage *input, float *mean, float *stddev = nullptr);
      /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDev
       *
       * @param[in] input  Input image. Data types supported: U8/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h

index 565f8f304040fe53231f259ec94a6e9637c94d51..e39a5908b8b1d7a3ea0d550a9ebb66bf8847c9a2 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -44,6 +44,16 @@ public:
       * @param[in]      epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
       */
      void configure(ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
+    /** Initialise the function's input and outputs.
+     *
+     * @note If the output tensor is a nullptr, the normalization will be performed in-place.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Input tensor with 2 dimensions. Data types supported: F16/F32.
+     * @param[out]     output          (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
+     * @param[in]      epsilon         (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
      /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel
       *
       * @param[in] input   Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr,
diff --git a/arm_compute/runtime/CL/functions/CLMedian3x3.h b/arm_compute/runtime/CL/functions/CLMedian3x3.h

index 3a9a95a5f3351c4eb3b42b3d323f558ff385f5a6..f3bb2832ef7f1aecd1659f47a2d20f3514640518 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLMedian3x3.h
+++ b/arm_compute/runtime/CL/functions/CLMedian3x3.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -50,6 +50,15 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
  };
  }
  #endif /*ARM_COMPUTE_CLMEDIAN3X3_H */
diff --git a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h

index 30a29f2b8ca546087874e7f07b8ab15ae6b5de4e..e9e3bd910cdeb1a51029e0524a6ff7c8d49a86bb 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h
+++ b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -66,6 +66,22 @@ public:
      void configure(const ICLImage *input, void *min, void *max,
                     CLCoordinates2DArray *min_loc = nullptr, CLCoordinates2DArray *max_loc = nullptr,
                     uint32_t *min_count = nullptr, uint32_t *max_count = nullptr);
+    /** Initialise the kernel's inputs and outputs.
+     *
+     * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input image. Data types supported: U8/S16/F32.
+     * @param[out] min             Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
+     * @param[out] max             Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
+     * @param[out] min_loc         (Optional) Array of Coordinates2D used to store minimum value locations.
+     * @param[out] max_loc         (Optional) Array of Coordinates2D used to store maximum value locations.
+     * @param[out] min_count       (Optional) Number of minimum value encounters.
+     * @param[out] max_count       (Optional) Number of maximum value encounters.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLImage *input, void *min, void *max,
+                   CLCoordinates2DArray *min_loc = nullptr, CLCoordinates2DArray *max_loc = nullptr,
+                   uint32_t *min_count = nullptr, uint32_t *max_count = nullptr);
  
      // Inherited methods overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h

index a7c87d35b0d5c24c4f11dda20d29c69fe8b5de4b..79f73ea370e6b924abea335af1126ca5ea0026bc 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h
+++ b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -56,6 +56,20 @@ public:
       */
      void configure(ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
                     BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor. Data types supported: U8
+     * @param[in]     function              Non linear function to perform
+     * @param[in]     mask_size             Mask size. Supported sizes: 3, 5
+     * @param[in]     pattern               Mask pattern
+     * @param[in]     mask                  The given mask. Will be used only if pattern is specified to PATTERN_OTHER
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
+                   BorderMode border_mode, uint8_t constant_border_value = 0);
  };
  }
  #endif /*ARM_COMPUTE_CLNONLINEARFILTER_H */
diff --git a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h

index 0859a09bdb24c6e1cb8754b5ed7d2fd08a3f8197..e2c0c4f8149dd8afb8edaa88d4b63770801827a3 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h
+++ b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -50,6 +50,18 @@ public:
       *                                   The implementation supports just 2 border modes: UNDEFINED and CONSTANT
       */
      void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT
+     *       The constant values used with CONSTANT border mode is 0
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] input           Source tensor. Data types supported: U8, F32. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output          Destination for the Non-Maxima suppressions 3x3. Data types supported: same as @p input.
+     * @param[in]     border_mode     Border mode to use for non-maxima suppression.
+     *                                   The implementation supports just 2 border modes: UNDEFINED and CONSTANT
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode);
  };
  }
  #endif /* ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H */
diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h

index d06bf5679486ac490042504b4a5a26b32ecfbcd6..07bb62c7d70f36d88b7ecc37509dcf7785beeff4 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -58,6 +58,17 @@ public:
       * @param[in]      norm_info Normalization layer information like the normalization type, normalization size and other parameters.
       */
      void configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                                 and an optional 4th dimension for batch of inputs. Data types supported: F16/F32 (Written to by the border handler).
+     *                                 Data layouts supported: NCHW/NHWC.
+     * @param[out]     output          Destination tensor. Dimensions, data type and number of channels must match the input ones.
+     *                                 Data types supported: same as @p input. Data layouts supported: same as @p input.
+     * @param[in]      norm_info       Normalization layer information like the normalization type, normalization size and other parameters.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info);
      /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayer
       *
       * @param[in] input     Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
diff --git a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h

index 5fbfdd18b7e8d4f6ec62428f7afada9d40a82493..5dd3760d3d267fe294a738d380f70585c8e0e694 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h
+++ b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h
@@ -50,6 +50,17 @@ public:
       *                    Data types supported: Same as @p input
       */
      void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels].
+     *                             Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[out] output          Destinationfeature tensor. Data type supported: same as @p input
+     * @param[in]  mean            Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: Same as @p input
+     * @param[in]  std             Standard deviation values tensor. 1 dimension with size equal to the number of input channels.
+     *                    Data types supported: Same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
      /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayer
       *
       * @param[in]  input  Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels].
diff --git a/arm_compute/runtime/CL/functions/CLOpticalFlow.h b/arm_compute/runtime/CL/functions/CLOpticalFlow.h

index 33df1752873fe29d605ddef36460bd1f9f64ae5a..12d0583384b81b3725c3181e86cea7bb7cffab36 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLOpticalFlow.h
+++ b/arm_compute/runtime/CL/functions/CLOpticalFlow.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -91,6 +91,27 @@ public:
                     const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points,
                     Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate,
                     BorderMode border_mode, uint8_t constant_border_value = 0);
+    /**  Initialise the function input and output
+     *
+     * @param[in]  compile_context       The compile context to be used.
+     * @param[in]  old_pyramid           Pointer to the pyramid for the old tensor. Data types supported U8
+     * @param[in]  new_pyramid           Pointer to the pyramid for the new tensor. Data types supported U8
+     * @param[in]  old_points            Pointer to the IKeyPointArray storing old key points
+     * @param[in]  new_points_estimates  Pointer to the IKeyPointArray storing new estimates key points
+     * @param[out] new_points            Pointer to the IKeyPointArray storing new key points
+     * @param[in]  termination           The criteria to terminate the search of each keypoint.
+     * @param[in]  epsilon               The error for terminating the algorithm
+     * @param[in]  num_iterations        The maximum number of iterations before terminate the alogrithm
+     * @param[in]  window_dimension      The size of the window on which to perform the algorithm
+     * @param[in]  use_initial_estimate  The flag to indicate whether the initial estimated position should be used
+     * @param[in]  border_mode           The border mode applied at scharr kernel stage
+     * @param[in]  constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT
+     *
+     */
+    void configure(const CLCompileContext &compile_context, const CLPyramid *old_pyramid, const CLPyramid *new_pyramid,
+                   const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points,
+                   Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate,
+                   BorderMode border_mode, uint8_t constant_border_value = 0);
  
      // Inherited methods overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLPReluLayer.h b/arm_compute/runtime/CL/functions/CLPReluLayer.h

index 7f8a41238c86ff1983dc42895bb730d971f76d9f..74fa86a3202e9a39d65f35a864ec12a7171bace5 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLPReluLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPReluLayer.h
@@ -47,6 +47,16 @@ public:
       * @param[out] output Destination tensor. Data type supported: same as @p input
       */
      void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output);
+    /** Set the input and output tensor.
+     *
+     * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  alpha           PRelu layer parameters. Data types supported: same of @p input.
+     * @param[out] output          Destination tensor. Data type supported: same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *alpha, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLPReluLayer
       *
       * @param[in] input  Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLPadLayer.h b/arm_compute/runtime/CL/functions/CLPadLayer.h

index f020d68c92c9ba79d14585cf4b5ea8fbecde240e..82d7205381016f479344b9bb1420fbc8999908db 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLPadLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPadLayer.h
@@ -63,6 +63,19 @@ public:
       *                            or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
       */
      void configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: All.
+     * @param[out] output          Output tensor. Data type supported: same as @p input
+     * @param[in]  padding         The padding for each spatial dimension of the input tensor. The pair padding[i]
+     *                             specifies the front and the end padding in the i-th dimension.
+     * @param[in]  constant_value  (Optional) Constant value to be used for the padding.
+     * @param[in]  mode            (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
+     *                            or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(),
+                   PaddingMode mode = PaddingMode::CONSTANT);
  
      /**  Static function to check if given info will lead to a valid configuration of @ref CLPadLayer.
       *
diff --git a/arm_compute/runtime/CL/functions/CLPermute.h b/arm_compute/runtime/CL/functions/CLPermute.h

index b1705cf4c551455853969dbd5c39925e9b61dea7..37e651cfbb07787eec73ad1cb9c6edd4f4e91090 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLPermute.h
+++ b/arm_compute/runtime/CL/functions/CLPermute.h
@@ -46,6 +46,16 @@ public:
       * @param[in] perm   Permutation vector
       */
      void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
+    /** Set the input and output tensors.
+     *
+     * @note Arbitrary permutation vectors are supported with rank not greater than 4
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] input           The input tensor to permute. Data types supported: All.
+     * @param[in] output          The output tensor. Data types supported: Same as @p input
+     * @param[in] perm            Permutation vector
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
      /**  Static function to check if given info will lead to a valid configuration of @ref CLPermute.
       *
       * @note Arbitrary permutation vectors are supported with rank not greater than 4
diff --git a/arm_compute/runtime/CL/functions/CLPhase.h b/arm_compute/runtime/CL/functions/CLPhase.h

index ac8a8670fc2800d19be25af1a3c5fb0c2fc7e3f9..f993906fe2b6cbf355d26aa719ff6587a1500b6f 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLPhase.h
+++ b/arm_compute/runtime/CL/functions/CLPhase.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -43,6 +43,15 @@ public:
       * @param[in]  phase_type (Optional) Phase calculation type. Default: SIGNED.
       */
      void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type = PhaseType::SIGNED);
+    /** Initialise the kernel's inputs, output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          First tensor input. Data types supported: S16.
+     * @param[in]  input2          Second tensor input. Data types supported: S16.
+     * @param[out] output          Output tensor. Data types supported: U8.
+     * @param[in]  phase_type      (Optional) Phase calculation type. Default: SIGNED.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type = PhaseType::SIGNED);
  };
  }
  #endif /*ARM_COMPUTE_CLPHASE_H */
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h

index 47bb2bf4db02f3aa92460fb0929692ad1fd2b2f0..8b0ee70f12c2e3773bd28e17216abd4aea7c1e13 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -59,6 +59,22 @@ public:
       */
      void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale,
                     ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output and convertion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          An input tensor. Data types supported: same as @p input1.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
+     * @param[in]      scale           Scale to apply after multiplication.
+     *                                 Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
+     * @param[in]      overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+     * @param[in]      rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale,
+                   ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplication
       *
       * @param[in] input1          An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
@@ -98,6 +114,17 @@ public:
       * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
       */
      void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          An input tensor. Data types supported: F32. Number of channels supported: 2.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
      /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplication
       *
       * @param[in] input1   An input tensor info. Data types supported: F32. Number of channels supported: 2.
diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h

index 05b35dcee84da45cb61242142891718f9b837277..7d646ab26811193fc1edb04c7b573e4fea7cbbd8 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
@@ -49,6 +49,15 @@ public:
       * @param[out]    indices   (optional) The indices of the maximal values. Data type supported: U32.
       */
      void configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
+    /** Set the input and output tensors.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] input           Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[out]    output          Destination tensor. Data types supported: Same as @p input.
+     * @param[in]     pool_info       Contains pooling operation information described in @ref PoolingLayerInfo.
+     * @param[out]    indices         (optional) The indices of the maximal values. Data type supported: U32.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
      /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayer
       *
       * @param[in] input     Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h

index eea1399552d7dd94d4ce61e921da11e32bb7ece6..d39e4112f9e9d5d44623b8925ccdd374887a5f53 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -46,6 +46,15 @@ public:
       * @param[in]  info   Prior box layer info.
       */
      void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
+     * @param[in]  input2          Second source tensor. Data types and layouts supported: same as @p input1
+     * @param[out] output          Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1
+     * @param[in]  info            Prior box layer info.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info);
      /** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayer
       *
       * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC.
diff --git a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h

index ab34135ff50f06b5a983e60ddcc28e2c3a00496b..72a61f85050903c2da8e4ad28f95d4f6d2065143 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
@@ -113,6 +113,55 @@ public:
                     ICLTensor *cell_state_out, ICLTensor *output_state_out,
                     const LSTMParams<ICLTensor> &lstm_params);
  
+    /** Initialize function's tensors.
+     *
+     * @param[in]  compile_context             The compile context to be used.
+     * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
+     * @param[in]  input_to_forget_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
+     * @param[in]  input_to_cell_weights       2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
+     * @param[in]  input_to_output_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
+     * @param[in]  recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
+     * @param[in]  recurrent_to_cell_weights   2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
+     * @param[in]  recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
+     * @param[in]  forget_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: S32.
+     * @param[in]  cell_bias                   1D weights tensor with dimensions [num_units]. Data type supported: S32.
+     * @param[in]  output_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: S32.
+     * @param[in]  cell_state_in               2D tensor with dimensions [output_size, batch_size]. Data type supported:  QSYMM16.
+     * @param[in]  output_state_in             2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
+     * @param[out] cell_state_out              Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported:  QSYMM16.
+     * @param[out] output_state_out            Destination tensor. Output is a 2D tensor with dimensions [num_units, batch_size].Data types supported: Same as @p input.
+     * @param[in]  lstm_params                 Weights tensors used in peephole, CIFG and layer normalization optimizations:
+     *                                         input_intermediate_scale   Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.
+     *                                         forget_intermediate_scale  Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.
+     *                                         cell_intermediate_scale    Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.
+     *                                         output_intermediate_scale  Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.
+     *                                         hidden_state_zero          The zero point of the hidden state.
+     *                                         hidden_state_scale         The scale of the hidden state.
+     *                                         input_to_input_weights     (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
+     *                                         recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
+     *                                         cell_to_input_weights      (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.
+     *                                         cell_to_forget_weights     (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
+     *                                         cell_to_output_weights     (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
+     *                                         input_gate_bias            (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.
+     *                                         projection_weights         (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
+     *                                         projection_bias            (Optional) 1D weights tensor with dimensions [output_size]. S32.
+     *                                         input_layer_norm_weights   (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
+     *                                         forget_layer_norm_weights  (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
+     *                                         cell_layer_norm_weights    (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
+     *                                         output_layer_norm_weights  (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
+     *                                         cell_threshold             (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].
+     *                                                                               If set to 0.0 then clipping is disabled.
+     *                                         projection_threshold       (Optional) The clipping threshold for the output from the projection layer, such that values are bound within
+     *                                                                               [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input,
+                   const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
+                   const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
+                   const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
+                   const ICLTensor *cell_state_in, const ICLTensor *output_state_in,
+                   ICLTensor *cell_state_out, ICLTensor *output_state_out,
+                   const LSTMParams<ICLTensor> &lstm_params);
+
      /** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayer
       *
       * @param[in]  input                       Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
@@ -169,19 +218,20 @@ public:
  private:
      /** Internal method to configure matrix multiplication plus output stage of each gate.
       *
-     * @param[in] mm             Matrix multiplication function to use.
-     * @param[in] outstage       Output stage function to use.
-     * @param[in] gemmlowp_info  GEMMLowp metadata to be used by the output stage.
-     * @param[in] mm_input       Input tensor to matrix multiplication function.
-     * @param[in] mm_weights     Weights tensor to matrix multiplication function.
-     * @param[in] bias           Bias tensor to matrix multiplication function.
-     * @param[in] outstage_res   Tensor to be used for storing the result of the output stage.
-     * @param[in] gemmlowp_scale Real multiplier to be used computing multiplier and shift for requantization.
-     * @param[in] mm_res_info    Tensor info to be used to initialize matrix multiplication result tensor.
-     * @param[in] mm_res_info    Tensor info to be used to initialize output stage result tensor.
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] mm              Matrix multiplication function to use.
+     * @param[in] outstage        Output stage function to use.
+     * @param[in] gemmlowp_info   GEMMLowp metadata to be used by the output stage.
+     * @param[in] mm_input        Input tensor to matrix multiplication function.
+     * @param[in] mm_weights      Weights tensor to matrix multiplication function.
+     * @param[in] bias            Bias tensor to matrix multiplication function.
+     * @param[in] outstage_res    Tensor to be used for storing the result of the output stage.
+     * @param[in] gemmlowp_scale  Real multiplier to be used computing multiplier and shift for requantization.
+     * @param[in] mm_res_info     Tensor info to be used to initialize matrix multiplication result tensor.
+     * @param[in] mm_res_info     Tensor info to be used to initialize output stage result tensor.
       *
       */
-    void configure_mm(CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
+    void configure_mm(const CLCompileContext &compile_context, CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
                        const ICLTensor *mm_input, const ICLTensor *mm_weights, const ICLTensor *bias, CLTensor *mm_res,
                        CLTensor *outstage_res, float gemmlowp_scale,
                        const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info);
diff --git a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h

index fbdef53aebcf9a70464da9b33090c4faa591dc8e..f59e3b7919cfa124e50fb0f05ff42a659c845b6b 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
@@ -48,6 +48,15 @@ public:
       * @note Output auto initialization is not supported by this function
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.
+     * @param[out] output          Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+     *
+     * @note Output auto initialization is not supported by this function
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayer
       *
       * @param[in] input  Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.
diff --git a/arm_compute/runtime/CL/functions/CLRNNLayer.h b/arm_compute/runtime/CL/functions/CLRNNLayer.h

index 569e3da89e9f40a5ecbc7878def2df9b82c312a9..0291eb17a9451bc0700d7d771bf9be3aa651086d 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLRNNLayer.h
+++ b/arm_compute/runtime/CL/functions/CLRNNLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -52,6 +52,19 @@ public:
       * @param[in]     info              Activation layer parameter.
       */
      void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *recurrent_weights, const ICLTensor *bias, ICLTensor *hidden_state, ICLTensor *output, ActivationLayerInfo &info);
+    /** Initialize the function
+     *
+     * @param[in]     compile_context   The compile context to be used.
+     * @param[in]     input             Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
+     * @param[in]     weights           Weights tensor of shape [input_size, num_units] that multiplies the input. Data types supported: Same as @p input
+     * @param[in]     recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the current 'state'. Data types supported: Same as @p input
+     * @param[in]     bias              Bias vector of shape [num_units]. Data types supported: Same as @p input
+     * @param[out]    output            Output tensor of shape [num_units, batch_size]. Data types supported: Same as @p input
+     * @param[in,out] hidden_state      Output tensor of shape [num_units, batch_size]. Data types supported: Same as @p input
+     * @param[in]     info              Activation layer parameter.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *recurrent_weights, const ICLTensor *bias, ICLTensor *hidden_state,
+                   ICLTensor *output, ActivationLayerInfo &info);
      /** Initialize the function
       *
       * @param[in] input             Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
diff --git a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h

index 7c2c6eb26f3ae1a2852401392fee2843816c8a45..b6defe6c7fe2ad8c7f243ef6db146f20d7f408d0 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h
+++ b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h
@@ -56,6 +56,22 @@ public:
       * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
       */
      void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  rois            ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+     *                             as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
+     *                             Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input
+     * @param[out] output          Destination tensor. Data types supported: Same as @p input.
+     * @param[in]  pool_info       Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+     *
+     * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+     * width and pooled height.
+     * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+     * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
      /** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayer
       *
       * @param[in] input     Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h

index 7d0e1da4f84819f6cacd4533e6b10f43bd9bdfdc..0376e7847cb3e94e3e03e6a69a597b2abfb961ff 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -56,6 +56,21 @@ public:
       * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
       */
      void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: F16/F32.
+     * @param[in]  rois            ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+     *                             as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
+     * @param[out] output          Destination tensor. Data types supported: Same as @p input.
+     * @param[in]  pool_info       Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+     *
+     * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+     * width and pooled height.
+     * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+     * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
  };
  }
  #endif /* ARM_COMPUTE_CLROIPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLRange.h b/arm_compute/runtime/CL/functions/CLRange.h

index 2cc8376b72a8c2a85377722a47e8e21356d0782d..19e11bacd4ca1d6c3f8dfa1e145b4418d21babce 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLRange.h
+++ b/arm_compute/runtime/CL/functions/CLRange.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -47,6 +47,15 @@ public:
       * @param[in]  step   The gap between each pair of values in the sequence. Default is 1.
       */
      void configure(ICLTensor *output, float start, float end, float step = 1.f);
+    /** Initialize the kernel's start, end, step and output tensor.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[out] output          Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  start           The starting value of the sequence.
+     * @param[in]  end             The ending (not including) value of the sequence.
+     * @param[in]  step            The gap between each pair of values in the sequence. Default is 1.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *output, float start, float end, float step = 1.f);
      /** Static function to check if given info will lead to a valid configuration of @ref CLRange
       *
       * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLReduceMean.h b/arm_compute/runtime/CL/functions/CLReduceMean.h

index 30000edd62fe36910458aff62de6d1f3625c6860..57ec48d690018bee3b385b2307ad5b5a8b02d251 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLReduceMean.h
+++ b/arm_compute/runtime/CL/functions/CLReduceMean.h
@@ -51,6 +51,17 @@ public:
       * @param[out] output         Destination tensor. Data type supported: Same as @p input
       */
      void configure(ICLTensor *input, const Coordinates &reduction_axis, bool keep_dims, ICLTensor *output);
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+     * @param[in]  reduction_axis  Reduction axis vector.
+     * @param[in]  keep_dims       If positive, retains reduced dimensions with length 1.
+     * @param[out] output          Destination tensor. Data type supported: Same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const Coordinates &reduction_axis, bool keep_dims, ICLTensor *output);
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLReduceMean
       *
diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h

index 254c7309fd04fa454ec784b8528899e3acf92857..25cf655802cff1231cfe93f925e539877a6ed12a 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLReductionOperation.h
+++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h
@@ -61,6 +61,16 @@ public:
       * @param[in]  keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true.
       */
      void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[out] output          Destination tensor. Data types and data layouts supported: Same as @p input.
+     * @param[in]  axis            Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3
+     * @param[in]  op              Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
+     * @param[in]  keep_dims       (Optional) Whether to keep the reduced dimension after the operation. Defaults to true.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true);
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperation.
       *
diff --git a/arm_compute/runtime/CL/functions/CLRemap.h b/arm_compute/runtime/CL/functions/CLRemap.h

index f035ac902c514f04b67ca5f51889a043cbc3ca60..dc8a2c4ecf361571ffd788784343b11746504aa2 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLRemap.h
+++ b/arm_compute/runtime/CL/functions/CLRemap.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -54,6 +54,20 @@ public:
       */
      void configure(ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output,
                     InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's sources, destination, interpolation policy and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[in]     map_x                 Map for X coords. Data types supported: F32.
+     * @param[in]     map_y                 Map for Y coords. Data types supported: F32.
+     * @param[out]    output                Output tensor. Data types supported: U8.
+     * @param[in]     policy                Interpolation policy to use. Only NEAREST and BILINEAR are supported.
+     * @param[in]     border_mode           Border mode to use on the input tensor.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output,
+                   InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
  };
  }
  #endif /*ARM_COMPUTE_CLREMAP_H */
diff --git a/arm_compute/runtime/CL/functions/CLReorgLayer.h b/arm_compute/runtime/CL/functions/CLReorgLayer.h

index dd08c0f1fc45c5891e5485a85a9c5a73444ecd47..8b245ab44195f3e7091a95fa255ed5e92d11e086 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLReorgLayer.h
+++ b/arm_compute/runtime/CL/functions/CLReorgLayer.h
@@ -45,6 +45,18 @@ public:
       *
       */
      void configure(ICLTensor *input, ICLTensor *output, int32_t stride);
+    /** Initialise the function's source and destination.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: All.
+     * @param[out] output          Destination tensor with tensor shape:
+     *                             [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
+     *                             the same number of input elements. Data types supported: same as @p input.
+     * @param[in]  stride          Stride value to use for reorganizing the values in the output tensor.
+     *                    It defines the spatial distance between 2 consecutive pixels in the x and y direction
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int32_t stride);
      /** Static function to check if given info will lead to a valid configuration of @ref CLReorgLayer
       *
       * @param[in] input  Source tensor. Data types supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLReshapeLayer.h b/arm_compute/runtime/CL/functions/CLReshapeLayer.h

index 63fe5457a3c218dd142f3fcff118ebf92b3e2cbb..e91c2c739b4029c5b5108eec9c17459df183a65e 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLReshapeLayer.h
+++ b/arm_compute/runtime/CL/functions/CLReshapeLayer.h
@@ -40,6 +40,13 @@ public:
       * @param[out] output Output tensor. Data type supported: Same as @p input
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialise the kernel's inputs and outputs
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           First tensor input. Data type supported: All
+     * @param[out] output          Output tensor. Data type supported: Same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayer
       *
diff --git a/arm_compute/runtime/CL/functions/CLReverse.h b/arm_compute/runtime/CL/functions/CLReverse.h

index f87bd19a90117670048750c1f8507701c9128516..87ae34c89d9e9091bb7cfa9fffd0d46507cb21f9 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLReverse.h
+++ b/arm_compute/runtime/CL/functions/CLReverse.h
@@ -41,6 +41,14 @@ public:
       * @param[in]  axis   Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
       */
      void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: All.
+     * @param[out] output          Output tensor. Data type supported: Same as @p input
+     * @param[in]  axis            Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
      /** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel
       *
       * @param[in] input  Input tensor info. Data types supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLScale.h b/arm_compute/runtime/CL/functions/CLScale.h

index c06c9b629acbd810b2ac54fe87ac9eb0c2a08e35..f345995138750a28b242c0ca01ee44c6434fdc3c 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLScale.h
+++ b/arm_compute/runtime/CL/functions/CLScale.h
@@ -51,6 +51,21 @@ public:
       */
      void configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(),
                     SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false);
+    /** Initialize the function's source, destination, interpolation type and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor. Data types supported: Same as @p input
+     *                                      All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+     * @param[in]     policy                The interpolation type.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     * @param[in]     sampling_policy       (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
+     * @param[in]     use_padding           (Optional) Is padding in use or not. Defaults to true.
+     * @param[in]     align_corners         (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(),
+                   SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false);
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLScale
       *
diff --git a/arm_compute/runtime/CL/functions/CLScharr3x3.h b/arm_compute/runtime/CL/functions/CLScharr3x3.h

index 708243260cb2f92f60a7013812718d5c2d3df16d..b25b548eaa195932901333bde7861fcdf39a772e 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLScharr3x3.h
+++ b/arm_compute/runtime/CL/functions/CLScharr3x3.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -53,6 +53,18 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination for the Scharr 3x3 convolution along the X axis. Data types supported: S16.
+     * @param[out]    output_y              (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data types supported: S16.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
  };
  }
  #endif /*ARM_COMPUTE_CLSCHARR3X3_H */
diff --git a/arm_compute/runtime/CL/functions/CLSelect.h b/arm_compute/runtime/CL/functions/CLSelect.h

index a7e06e4eec600b01fe2f38474d25ca3806cec4de..84d09971498f4db18ae64af36de6b4dc8827ee78 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLSelect.h
+++ b/arm_compute/runtime/CL/functions/CLSelect.h
@@ -45,6 +45,15 @@ public:
       * @param[out] output Output tensor. Data types supported: Same as @p x.
       */
      void configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
+    /** Initialise the kernel's inputs and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  c               Condition input tensor. Data types supported: U8.
+     * @param[in]  x               First input tensor. Data types supported: All.
+     * @param[in]  y               Second input tensor. Data types supported: Same as @p x
+     * @param[out] output          Output tensor. Data types supported: Same as @p x.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLSelect
       *
       * @param[in] c      Condition input tensor. Data types supported: U8.
diff --git a/arm_compute/runtime/CL/functions/CLSlice.h b/arm_compute/runtime/CL/functions/CLSlice.h

index f5fca4387444b43fc358d5d4eeaac5ba18cc69cc..a8c6e1ff0bf72b04b56c04144d40f7c644fb5038 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLSlice.h
+++ b/arm_compute/runtime/CL/functions/CLSlice.h
@@ -48,6 +48,20 @@ public:
       * @param[in]  ends   The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
       */
      void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends);
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     * @note Start indices must be non-negative. 0 <= starts[i]
+     * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
+     * @note End indices are not inclusive unless negative.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data type supported: All.
+     * @param[out] output          Destination tensor. Data type supported: Same as @p input
+     * @param[in]  starts          The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in]  ends            The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends);
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLSlice
       *
diff --git a/arm_compute/runtime/CL/functions/CLSobel3x3.h b/arm_compute/runtime/CL/functions/CLSobel3x3.h

index 2f4cf50465468636f3fb6969349fcfdf402459e1..24bc0cda4344905c683dd096f8eeb88a22771fe8 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLSobel3x3.h
+++ b/arm_compute/runtime/CL/functions/CLSobel3x3.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -53,6 +53,18 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination for the Sobel 3x3 convolution along the X axis. Data types supported: S16.
+     * @param[out]    output_y              (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data types supported: S16.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
  };
  }
  #endif /*ARM_COMPUTE_CLSOBEL3X3_H */
diff --git a/arm_compute/runtime/CL/functions/CLSobel5x5.h b/arm_compute/runtime/CL/functions/CLSobel5x5.h

index 2a9136b92e9604080664872bfbfb3ecad39c64fa..bf266270c32d0d04f319f323fa9c2eeb150b401b 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLSobel5x5.h
+++ b/arm_compute/runtime/CL/functions/CLSobel5x5.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -65,6 +65,18 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination for the Sobel 5x5 convolution along the X axis. Data types supported: S16.
+     * @param[out]    output_y              (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data types supported: S16.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
  
      // Inherited methods overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLSobel7x7.h b/arm_compute/runtime/CL/functions/CLSobel7x7.h

index e3188b85f59dbbfbf16b518e99f245932fb92297..13932c704adf6bfd31348ee4cdf2da80af84f67e 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLSobel7x7.h
+++ b/arm_compute/runtime/CL/functions/CLSobel7x7.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -65,6 +65,18 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination for the Sobel 7x7 convolution along the X axis. Data types supported: S32.
+     * @param[out]    output_y              (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data types supported: S32.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
  
      // Inherited methods overridden:
      void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h

index 751b68d0cf26f23a6e92db9ea45c98de983c067c..fadbc430e68707cab9f4b85097eb28b2384c4977 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -67,6 +67,17 @@ public:
       *                    when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
       */
      void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t axis = 1);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/F16/F32
+     * @param[out] output          Destination tensor. Data types supported: same as @p input
+     * @param[in]  beta            (Optional) A scaling factor for the exponent. Defaults to 1.f
+     * @param[in]  axis            (Optional) Reduction axis. It has the purpose of squashing the first @p axis
+     *                    dimensions together. For instance, given a [4x4x4x4] image,
+     *                    when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t axis = 1);
      /** Static function to check if given info will lead to a valid configuration of @ref CLSoftmaxLayer
       *
       * @param[in] input  Source tensor. Data types supported: QASYMM8/F16/F32
@@ -97,6 +108,21 @@ private:
       *                    when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
       */
      void configure_reshape_input_kernel(const ICLTensor *input, const ICLTensor *output, size_t axis);
+    /** Utility method to configure the kernels needed to flatten the input
+     * tensor.
+     *
+     * @note This function changes the internal state of this class. In particular,
+     * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and
+     * @p _output_flat
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] input           Original source tensor.
+     * @param[in] output          Original destination tensor.
+     * @param[in] axis            (Optional) Reduction axis. It has the purpose of squashing the first @p axis
+     *                    dimensions together. For instance, given a [4x4x4x4] image,
+     *                    when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+     */
+    void configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t axis);
  
      MemoryGroup                    _memory_group;
      CLLogits1DMaxShiftExpSumKernel _max_shift_exp_sum_kernel;
diff --git a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h

index ef9f1641121708e38df064283e54b80e0ae1e06f..b8e2bdc4c6dd96d7f4cbcf961438bb87f378a270 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
@@ -63,6 +63,15 @@ public:
       * @param[out] output      Tensor output. Data types supported: same as @p input
       */
      void configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[in]  block_shape     1-D tensor with shape [M]. Data types supported: S32
+     * @param[in]  paddings        2-D tensor with shape [2, M]. Data types supported: S32
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
      /** Set the input and output tensors. (Static block shape and paddings)
       *
       * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -73,6 +82,18 @@ public:
       * @param[out] output        Tensor output. Data types supported: same as @p input
       */
      void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output);
+    /** Set the input and output tensors. (Static block shape and paddings)
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[in]  block_shape_x   Block shape x value.
+     * @param[in]  block_shape_y   Block shape y value.
+     * @param[in]  padding_left    The left padding of the output tensor.
+     * @param[in]  padding_right   The right padding of the output tensor.
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right,
+                   ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayer
       *
       * @param[in]  input       Tensor input info. Supported tensor rank: 4. Data types supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h

index be7937d0e654498b902544966eea1cd7b14e1580..ac011dd9981aebd87b7563d537cebbd69ae47386 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h
@@ -46,6 +46,14 @@ public:
       * @param[in]  block_shape Block shape value.
       */
      void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     * @param[in]  block_shape     Block shape value.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
      /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayer.
       *
       * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLStackLayer.h b/arm_compute/runtime/CL/functions/CLStackLayer.h

index ebce4f34d0f6d8b90be102dbfffde5655d85254f..9b204458c337be18187aac6bb90ad26554160379 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLStackLayer.h
+++ b/arm_compute/runtime/CL/functions/CLStackLayer.h
@@ -56,6 +56,17 @@ public:
       * @param[out] output Output tensor. Data types supported: Same as @p input.
       */
      void configure(const std::vector<ICLTensor *> &input, int axis, ICLTensor *output);
+    /** Initialise the kernel's inputs vector and output.
+     *
+     * @note Supported input tensor rank: up to 4
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The vectors containing all the tensors with the same shape to stack. Data types supported: All.
+     * @param[in]  axis            The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
+     *                             Negative values wrap around
+     * @param[out] output          Output tensor. Data types supported: Same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const std::vector<ICLTensor *> &input, int axis, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel
       *
       * @note Supported input tensor rank: up to 4
diff --git a/arm_compute/runtime/CL/functions/CLStridedSlice.h b/arm_compute/runtime/CL/functions/CLStridedSlice.h

index 6bde2c0af4a4f8f4b8a0a16685ba27320f6f2aa3..bb2bc962d6d6e0636049245c34a213f5d86c8d02 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLStridedSlice.h
+++ b/arm_compute/runtime/CL/functions/CLStridedSlice.h
@@ -52,6 +52,24 @@ public:
      void configure(const ICLTensor *input, ICLTensor *output,
                     const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
                     int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data type supported: All.
+     * @param[out] output           Destination tensor. Data type supported: Same as @p input
+     * @param[in]  starts           The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in]  ends             The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in]  strides          The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in]  begin_mask       (Optional) If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
+     * @param[in]  end_mask         (Optional) If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
+     * @param[in]  shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
+     *                              A slice of size 1 starting from starts[i] in the dimension must be preserved.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output,
+                   const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
+                   int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
  
      /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSlice
       *
diff --git a/arm_compute/runtime/CL/functions/CLTableLookup.h b/arm_compute/runtime/CL/functions/CLTableLookup.h

index c1b7b943a0e961c2adcefe08aeba9be3b6091774..1c11f076a32914179caa7d46b016e2efa30fc3a6 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLTableLookup.h
+++ b/arm_compute/runtime/CL/functions/CLTableLookup.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -42,6 +42,14 @@ public:
       * @param[out] output Output tensor. Data types supported: U8 and S16
       */
      void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           First tensor input. Data types supported: U8 and S16
+     * @param[in]  lut             Input lookup table. Data types supported: U8 and S16
+     * @param[out] output          Output tensor. Data types supported: U8 and S16
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
  };
  }
  #endif /*ARM_COMPUTE_CLTABLELOOKUP_H */
diff --git a/arm_compute/runtime/CL/functions/CLThreshold.h b/arm_compute/runtime/CL/functions/CLThreshold.h

index a19b320b3ec01416191ad89925aaf78378b62100..d8ae6fbb3464555b46cc01b5288a93564a0771b0 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLThreshold.h
+++ b/arm_compute/runtime/CL/functions/CLThreshold.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -50,6 +50,20 @@ public:
      void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold,
                     uint8_t false_value = 0, uint8_t true_value = 0,
                     ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0);
+    /** Initialise the function's source, destination, thresholds and threshold type
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           First tensor input. Data types supported: U8.
+     * @param[out] output          Output tensor. Data types supported: U8.
+     * @param[in]  threshold       Threshold. If upper threshold is specified, this will be used as the lower threshold.
+     * @param[in]  false_value     Value to assign when the condition is false.
+     * @param[in]  true_value      value to assign when the condition is true.
+     * @param[in]  type            Thresholding type. Can either be BINARY or RANGE.
+     * @param[in]  upper           Upper threshold. Only used with RANGE thresholding
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold,
+                   uint8_t false_value = 0, uint8_t true_value = 0,
+                   ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0);
  };
  }
  #endif /*ARM_COMPUTE_CLTHRESHOLD_H */
diff --git a/arm_compute/runtime/CL/functions/CLTile.h b/arm_compute/runtime/CL/functions/CLTile.h

index 9c83b0cacebf86074978eef0c81d52a567de433f..0dad9ad89d220a5f6052dd0ad866e14fa7021009 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLTile.h
+++ b/arm_compute/runtime/CL/functions/CLTile.h
@@ -43,6 +43,14 @@ public:
       * @param[out] output    Destination tensor. Same as @p input
       */
      void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
+    /** Set the source, destination of the kernel
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data type supported: All.
+     * @param[in]  multiples       Contains the number of times the input tensor should be replicated on the given dimension.
+     * @param[out] output          Destination tensor. Same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
      /** Static function to check if given info will lead to a valid configuration of @ref CLTile
       *
       * @param[in] input     Source tensor info. Data type supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h

index 61092a19148aff15b596f58e6cc2d9b68f426069..b2fdcda5c4a3cb324e74b0ea39729b37f0b90c92 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLTranspose.h
+++ b/arm_compute/runtime/CL/functions/CLTranspose.h
@@ -44,6 +44,13 @@ public:
       * @param[out] output Output tensor. Data type supported: Same as @p input
       */
      void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: All.
+     * @param[out] output          Output tensor. Data type supported: Same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
      /** Static function to check if given info will lead to a valid configuration of @ref CLTranspose
       *
       * @param[in] input  The input tensor. Data types supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLUnstack.h b/arm_compute/runtime/CL/functions/CLUnstack.h

index 814d07384c4a370fe1cb34a039ca91d14aea3cda..777da692be128f10114513f08a02c12104b7b480 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLUnstack.h
+++ b/arm_compute/runtime/CL/functions/CLUnstack.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -55,6 +55,16 @@ public:
       *
       */
      void configure(const ICLTensor *input, const std::vector<ICLTensor *> &output_vector, int axis);
+    /** Set the input, output and unstacking axis.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in]     input           A tensor to be unstacked. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in,out] output_vector   A vector of tensors. Data types supported: Same as @p input.
+     *                                Note: The number of elements of the vector will be used as the number of slices to be taken from the axis.
+     * @param[in]     axis            The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const std::vector<ICLTensor *> &output_vector, int axis);
      /** Static function to check if given info will lead to a valid configuration of @ref CLUnstack
       *
       * @param[in] input         Input tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
diff --git a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h

index 1695fd7d2bc66c7045be4aaea1bfcdc00451674f..5f4f57f82479cf9a72f50ceb3b63b6234b784bdc 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
+++ b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
@@ -60,6 +60,16 @@ public:
       */
      void configure(ICLTensor *input, ICLTensor *output,
                     const Size2D &info, const InterpolationPolicy upsampling_policy);
+    /** Initialize the function's source, destination, interpolation type and border_mode.
+     *
+     * @param[in]  compile_context   The compile context to be used.
+     * @param[in]  input             Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[out] output            Destination tensor. Data types supported: same as @p input.
+     * @param[in]  info              Contains stride information described in @ref Size2D.
+     * @param[in]  upsampling_policy Defines the policy to fill the intermediate pixels.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output,
+                   const Size2D &info, const InterpolationPolicy upsampling_policy);
      /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
       *
       * @param[in] input             Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLWarpAffine.h b/arm_compute/runtime/CL/functions/CLWarpAffine.h

index 2de7107f13e20e367fd8b27530a798464b4ccca4..1a2fe9d4d5bff42317df8cc918b31188429212f1 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLWarpAffine.h
+++ b/arm_compute/runtime/CL/functions/CLWarpAffine.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -48,6 +48,19 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialize the function's source, destination, interpolation policy and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source temspr. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     matrix                The affine matrix. Must be 2x3 of type float.
+     *                                      The matrix argument requires 9 values, the last 3 values are ignored.
+     * @param[in]     policy                The interpolation type.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode,
+                   uint8_t constant_border_value = 0);
  };
  }
  #endif /*ARM_COMPUTE_CLWARPAFFINE_H */
diff --git a/arm_compute/runtime/CL/functions/CLWarpPerspective.h b/arm_compute/runtime/CL/functions/CLWarpPerspective.h

index 93fcc85a950f960138ce6f92100353dd498d2b77..5db9ec4cf0b87f18355b624c4b93822f09e02960 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLWarpPerspective.h
+++ b/arm_compute/runtime/CL/functions/CLWarpPerspective.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -47,6 +47,18 @@ public:
       * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
       */
      void configure(ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialize the function's source, destination, interpolation policy and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor. Data types supported: U8.
+     * @param[in]     matrix                The perspective matrix. Must be 3x3 of type float.
+     * @param[in]     policy                The interpolation type.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode,
+                   uint8_t constant_border_value = 0);
  };
  }
  #endif /*ARM_COMPUTE_CLWARPPERSPECTIVE_H */
diff --git a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h

index 7ac59c900cc1e2f12d8e721a8f0ba89f0a9c6b72..c1de5f15cea7b63dbf63b1b33c0a594c368d3b0d 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -75,6 +75,26 @@ public:
       */
      void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
                     const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
+    /** Set the input and output tensors.
+     *
+     * @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout
+     * @note  Some Winograd configurations (i.e. F(4x4, 5x5)) are supported only with enable_fast_math = true
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                              while every optional dimension from 4 and above represent a batch of inputs.
+     *                              Data types supported: F16/F32.
+     * @param[in]  weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in]  biases           Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+     * @param[out] output           Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                              Data types supported: Same as @p input.
+     * @param[in]  conv_info        Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  act_info         (Optional) Activation layer information in case of a fused activation.
+     * @param[in]  enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+     *                              available which may introduce a drop of accuracy as well. Default is false
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
      /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradConvolutionLayer
       *
       * @note: This function only works with 3x3,3x1,1x3,5x5,5x1 and 1x5 kernels along with unit strides for both NCHW and NHWC data layout
diff --git a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h

index e1ab928cf2fc9e8e59d9b2f098a8ba8aea273fa0..11a402e51d5e78fd2197ba6b01ae4d666fabb7ef 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -55,6 +55,25 @@ public:
       * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
       */
      void configure(ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
+    /** Set the input and output tensors.
+     *
+     * @note Winograd input transform supports the following configurations for NCWH data layout
+     *       F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+     *                                   F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+     *                                   F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+     *
+     * @note Winograd input transform supports the following configurations for NHWC data layout
+     *       F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+     *                                   F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+     *
+     *       Strides: only unit strides
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] input           The input tensor to transform. Data types supported: F16,F32
+     * @param[in] output          The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
+     * @param[in] winograd_info   Contains Winograd's information described in @ref WinogradInfo.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
      /**  Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransform.
       *
       * @note Winograd input transform supports the following configurations for NCWH data layout
diff --git a/arm_compute/runtime/CL/functions/CLYOLOLayer.h b/arm_compute/runtime/CL/functions/CLYOLOLayer.h

index 95c684b2c32844cb5b2f965aea686ff55920ff28..e70d84b97e84e47d0a781de1ac71d0b4749af737 100644 (file)
--- a/arm_compute/runtime/CL/functions/CLYOLOLayer.h
+++ b/arm_compute/runtime/CL/functions/CLYOLOLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -53,6 +53,18 @@ public:
       * @param[in]      num_classes Number of classes to activate (must be submultiple of @p input channels)
       */
      void configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
+    /** Set the input and output tensor.
+     *
+     * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+     *                                 of the activation function. Data types supported: F16/F32.
+     * @param[out]     output          Destination tensor. Data type supported: same as @p input
+     * @param[in]      act_info        Activation layer parameters.
+     * @param[in]      num_classes     Number of classes to activate (must be submultiple of @p input channels)
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
      /** Static function to check if given info will lead to a valid configuration of @ref CLYOLOLayer
       *
       * @param[in] input       Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
diff --git a/src/runtime/CL/functions/CLAbsoluteDifference.cpp b/src/runtime/CL/functions/CLAbsoluteDifference.cpp

index cf77c807cf54e834973d034e55b254d792ede782..492c54e4d30ca30d1d1aa1771dea615637a63397 100644 (file)
--- a/src/runtime/CL/functions/CLAbsoluteDifference.cpp
+++ b/src/runtime/CL/functions/CLAbsoluteDifference.cpp
@@ -31,8 +31,13 @@
  using namespace arm_compute;
  
  void CLAbsoluteDifference::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output);
+}
+
+void CLAbsoluteDifference::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLAbsoluteDifferenceKernel>();
-    k->configure(input1, input2, output);
+    k->configure(compile_context, input1, input2, output);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLAccumulate.cpp b/src/runtime/CL/functions/CLAccumulate.cpp

index d529d9402a7b4cb7535bd7402b64e2b660edd550..a81d1d042b9b4ca3068a8a9afb28ef518478d86c 100644 (file)
--- a/src/runtime/CL/functions/CLAccumulate.cpp
+++ b/src/runtime/CL/functions/CLAccumulate.cpp
@@ -31,22 +31,37 @@
  using namespace arm_compute;
  
  void CLAccumulate::configure(const ICLTensor *input, ICLTensor *accum)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, accum);
+}
+
+void CLAccumulate::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLAccumulateKernel>();
-    k->configure(input, accum);
+    k->configure(compile_context, input, accum);
      _kernel = std::move(k);
  }
  
  void CLAccumulateWeighted::configure(const ICLTensor *input, float alpha, ICLTensor *accum)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, alpha, accum);
+}
+
+void CLAccumulateWeighted::configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLAccumulateWeightedKernel>();
-    k->configure(input, alpha, accum);
+    k->configure(compile_context, input, alpha, accum);
      _kernel = std::move(k);
  }
  
  void CLAccumulateSquared::configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, shift, accum);
+}
+
+void CLAccumulateSquared::configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLAccumulateSquaredKernel>();
-    k->configure(input, shift, accum);
+    k->configure(compile_context, input, shift, accum);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp

index 988214574111fd6c86ac6bf8ab9d231d38112a14..989603a9dfc97da576874201409a94ece353c4a4 100644 (file)
--- a/src/runtime/CL/functions/CLActivationLayer.cpp
+++ b/src/runtime/CL/functions/CLActivationLayer.cpp
@@ -36,9 +36,14 @@ CLActivationLayer::CLActivationLayer(CLRuntimeContext *ctx)
  }
  
  void CLActivationLayer::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, act_info);
+}
+
+void CLActivationLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLActivationLayerKernel>();
-    k->configure(input, output, act_info);
+    k->configure(compile_context, input, output, act_info);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp

index 4ac6d25d75e9885c4ad53b4183818e0ee05e76d8..5b4c694f3300a0f8ca8e8c6e8b9b76899775f013 100644 (file)
--- a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp
+++ b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -105,6 +105,11 @@ Status CLArgMinMaxLayer::validate(const ITensorInfo *input, int axis, const ITen
  }
  
  void CLArgMinMaxLayer::configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, axis, output, op);
+}
+
+void CLArgMinMaxLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
      _num_of_stages  = calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
@@ -121,7 +126,7 @@ void CLArgMinMaxLayer::configure(const ICLTensor *input, int axis, ICLTensor *ou
      // Create temporary tensors
      if(_num_of_stages == 1)
      {
-        _reduction_kernels_vector[0].configure(input, nullptr, &_not_reshaped_output, axis, op);
+        _reduction_kernels_vector[0].configure(compile_context, input, nullptr, &_not_reshaped_output, axis, op);
      }
      else
      {
@@ -135,22 +140,22 @@ void CLArgMinMaxLayer::configure(const ICLTensor *input, int axis, ICLTensor *ou
  
          // Apply ReductionOperation only on first kernel
          _memory_group.manage(&_results_vector[0]);
-        _reduction_kernels_vector[0].configure(input, nullptr, &_results_vector[0], axis, op);
+        _reduction_kernels_vector[0].configure(compile_context, input, nullptr, &_results_vector[0], axis, op);
  
          // Apply ReductionOperation on intermediate stages
          for(unsigned int i = 1; i < _num_of_stages - 1; ++i)
          {
              _memory_group.manage(&_results_vector[i]);
-            _reduction_kernels_vector[i].configure(input, &_results_vector[i - 1], &_results_vector[i], axis, op);
+            _reduction_kernels_vector[i].configure(compile_context, input, &_results_vector[i - 1], &_results_vector[i], axis, op);
              _results_vector[i - 1].allocator()->allocate();
          }
  
          // Apply ReductionOperation on the last stage
          const unsigned int last_stage = _num_of_stages - 1;
-        _reduction_kernels_vector[last_stage].configure(input, &_results_vector[last_stage - 1], &_not_reshaped_output, axis, op);
+        _reduction_kernels_vector[last_stage].configure(compile_context, input, &_results_vector[last_stage - 1], &_not_reshaped_output, axis, op);
          _results_vector[last_stage - 1].allocator()->allocate();
      }
-    _reshape_kernel.configure(&_not_reshaped_output, output);
+    _reshape_kernel.configure(compile_context, &_not_reshaped_output, output);
      _not_reshaped_output.allocator()->allocate();
  }
  
diff --git a/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp b/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp

index f87ea6ea061ef6647cdedf739549a25ee071e034..9fc51136b300ea2bf6b147ba37c512207023aaf8 100644 (file)
--- a/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -40,7 +40,14 @@ CLBatchNormalizationLayer::CLBatchNormalizationLayer()
  void CLBatchNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon,
                                            ActivationLayerInfo act_info)
  {
-    _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon, act_info);
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, mean, var, beta, gamma, epsilon, act_info);
+}
+
+void CLBatchNormalizationLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta,
+                                          const ICLTensor *gamma, float epsilon,
+                                          ActivationLayerInfo act_info)
+{
+    _norm_kernel.configure(compile_context, input, output, mean, var, beta, gamma, epsilon, act_info);
  }
  
  Status CLBatchNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output,
diff --git a/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp b/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp

index 7919b131c877b4eed99b625aedf4575b3c431fea..0a2ae2a6e05f832daca93b895c71a398f562c66e 100644 (file)
--- a/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp
+++ b/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -39,12 +39,22 @@ CLBatchToSpaceLayer::CLBatchToSpaceLayer()
  
  void CLBatchToSpaceLayer::configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output)
  {
-    _batch_to_space_kernel.configure(input, block_shape, output);
+    configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, output);
+}
+
+void CLBatchToSpaceLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output)
+{
+    _batch_to_space_kernel.configure(compile_context, input, block_shape, output);
  }
  
  void CLBatchToSpaceLayer::configure(const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output)
  {
-    _batch_to_space_kernel.configure(input, block_shape_x, block_shape_y, output);
+    configure(CLKernelLibrary::get().get_compile_context(), input, block_shape_x, block_shape_y, output);
+}
+
+void CLBatchToSpaceLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output)
+{
+    _batch_to_space_kernel.configure(compile_context, input, block_shape_x, block_shape_y, output);
  }
  
  Status CLBatchToSpaceLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output)
diff --git a/src/runtime/CL/functions/CLBitwiseAnd.cpp b/src/runtime/CL/functions/CLBitwiseAnd.cpp

index 55ee78cd28b65838e449bc60bc87029a111e7b99..1fa80f0a249de8c78613f8ba145c4b313652b01d 100644 (file)
--- a/src/runtime/CL/functions/CLBitwiseAnd.cpp
+++ b/src/runtime/CL/functions/CLBitwiseAnd.cpp
@@ -31,8 +31,13 @@
  using namespace arm_compute;
  
  void CLBitwiseAnd::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output);
+}
+
+void CLBitwiseAnd::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLBitwiseAndKernel>();
-    k->configure(input1, input2, output);
+    k->configure(compile_context, input1, input2, output);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLBitwiseNot.cpp b/src/runtime/CL/functions/CLBitwiseNot.cpp

index f20363169be9012d30e7718fce3f67e883c084ac..46595191a0d75af5ec68ba542681a49cfd0bdf9b 100644 (file)
--- a/src/runtime/CL/functions/CLBitwiseNot.cpp
+++ b/src/runtime/CL/functions/CLBitwiseNot.cpp
@@ -31,8 +31,13 @@
  using namespace arm_compute;
  
  void CLBitwiseNot::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLBitwiseNot::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLBitwiseNotKernel>();
-    k->configure(input, output);
+    k->configure(compile_context, input, output);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLBitwiseOr.cpp b/src/runtime/CL/functions/CLBitwiseOr.cpp

index f1d07df1e72bfa2cec9d5f95be2a615647f769e9..8431140cb83238052e99ddd5427e6f588d85f87d 100644 (file)
--- a/src/runtime/CL/functions/CLBitwiseOr.cpp
+++ b/src/runtime/CL/functions/CLBitwiseOr.cpp
@@ -31,8 +31,13 @@
  using namespace arm_compute;
  
  void CLBitwiseOr::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output);
+}
+
+void CLBitwiseOr::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLBitwiseOrKernel>();
-    k->configure(input1, input2, output);
+    k->configure(compile_context, input1, input2, output);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLBitwiseXor.cpp b/src/runtime/CL/functions/CLBitwiseXor.cpp

index e5d3c7a292a9a6309d779a70867a1148eec9d899..0e0e7f2028a22c9bc3ac3ca40911d2bfa9f9114c 100644 (file)
--- a/src/runtime/CL/functions/CLBitwiseXor.cpp
+++ b/src/runtime/CL/functions/CLBitwiseXor.cpp
@@ -31,8 +31,13 @@
  using namespace arm_compute;
  
  void CLBitwiseXor::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output);
+}
+
+void CLBitwiseXor::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLBitwiseXorKernel>();
-    k->configure(input1, input2, output);
+    k->configure(compile_context, input1, input2, output);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLBoundingBoxTransform.cpp b/src/runtime/CL/functions/CLBoundingBoxTransform.cpp

index 8669da636ec5a6da15ecf76e78b98f2c22543d93..55bcde749ca0bb4eecdbc5dfda681724af952464 100644 (file)
--- a/src/runtime/CL/functions/CLBoundingBoxTransform.cpp
+++ b/src/runtime/CL/functions/CLBoundingBoxTransform.cpp
@@ -29,10 +29,15 @@
  namespace arm_compute
  {
  void CLBoundingBoxTransform::configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), boxes, pred_boxes, deltas, info);
+}
+
+void CLBoundingBoxTransform::configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info)
  {
      // Configure Bounding Box kernel
      auto k = arm_compute::support::cpp14::make_unique<CLBoundingBoxTransformKernel>();
-    k->configure(boxes, pred_boxes, deltas, info);
+    k->configure(compile_context, boxes, pred_boxes, deltas, info);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLBox3x3.cpp b/src/runtime/CL/functions/CLBox3x3.cpp

index d288e42d9b5493fb592f18fa47735e83489beda0..72c822197c68e7a626ea19152417fdde5e60286b 100644 (file)
--- a/src/runtime/CL/functions/CLBox3x3.cpp
+++ b/src/runtime/CL/functions/CLBox3x3.cpp
@@ -32,9 +32,14 @@
  using namespace arm_compute;
  
  void CLBox3x3::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value);
+}
+
+void CLBox3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLBox3x3Kernel>();
-    k->configure(input, output, border_mode == BorderMode::UNDEFINED);
+    k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
      _kernel = std::move(k);
-    _border_handler.configure(input, BorderSize(1), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
  }
diff --git a/src/runtime/CL/functions/CLCannyEdge.cpp b/src/runtime/CL/functions/CLCannyEdge.cpp

index 28abaa284b80dede0a7b427d92809f4064c712e3..0c8d3532aac2bc534838f187ede7916d7737cb05 100644 (file)
--- a/src/runtime/CL/functions/CLCannyEdge.cpp
+++ b/src/runtime/CL/functions/CLCannyEdge.cpp
@@ -57,6 +57,13 @@ CLCannyEdge::CLCannyEdge(std::shared_ptr<IMemoryManager> memory_manager) // NOLI
  
  void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode,
                              uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, upper_thr, lower_thr, gradient_size, norm_type, border_mode, constant_border_value);
+}
+
+void CLCannyEdge::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type,
+                            BorderMode border_mode,
+                            uint8_t    constant_border_value)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
      ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
@@ -111,19 +118,19 @@ void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_t
      if(gradient_size == 3)
      {
          auto k = arm_compute::support::cpp14::make_unique<CLSobel3x3>();
-        k->configure(input, &_gx, &_gy, border_mode, constant_border_value);
+        k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
          _sobel = std::move(k);
      }
      else if(gradient_size == 5)
      {
          auto k = arm_compute::support::cpp14::make_unique<CLSobel5x5>();
-        k->configure(input, &_gx, &_gy, border_mode, constant_border_value);
+        k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
          _sobel = std::move(k);
      }
      else if(gradient_size == 7)
      {
          auto k = arm_compute::support::cpp14::make_unique<CLSobel7x7>();
-        k->configure(input, &_gx, &_gy, border_mode, constant_border_value);
+        k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
          _sobel = std::move(k);
      }
      else
@@ -136,7 +143,7 @@ void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_t
      _memory_group.manage(&_phase);
  
      // Configure gradient
-    _gradient.configure(&_gx, &_gy, &_mag, &_phase, norm_type);
+    _gradient.configure(compile_context, &_gx, &_gy, &_mag, &_phase, norm_type);
  
      // Allocate intermediate buffers
      _gx.allocator()->allocate();
@@ -146,14 +153,14 @@ void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_t
      _memory_group.manage(&_nonmax);
  
      // Configure non-maxima suppression
-    _non_max_suppr.configure(&_mag, &_phase, &_nonmax, lower_thr, border_mode == BorderMode::UNDEFINED);
+    _non_max_suppr.configure(compile_context, &_mag, &_phase, &_nonmax, lower_thr, border_mode == BorderMode::UNDEFINED);
  
      // Allocate intermediate buffers
      _phase.allocator()->allocate();
  
      // Fill border around magnitude image as non-maxima suppression will access
      // it. If border mode is undefined filling the border is a nop.
-    _border_mag_gradient.configure(&_mag, _non_max_suppr.border_size(), border_mode, constant_border_value);
+    _border_mag_gradient.configure(compile_context, &_mag, _non_max_suppr.border_size(), border_mode, constant_border_value);
  
      // Allocate intermediate buffers
      _mag.allocator()->allocate();
@@ -165,7 +172,7 @@ void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_t
      _memory_group.manage(&_l1_list_counter);
  
      // Configure edge tracing
-    _edge_trace.configure(&_nonmax, output, upper_thr, lower_thr, &_visited, &_recorded, &_l1_stack, &_l1_list_counter);
+    _edge_trace.configure(compile_context, &_nonmax, output, upper_thr, lower_thr, &_visited, &_recorded, &_l1_stack, &_l1_list_counter);
  
      // Allocate intermediate buffers
      _visited.allocator()->allocate();
diff --git a/src/runtime/CL/functions/CLCast.cpp b/src/runtime/CL/functions/CLCast.cpp

index e3813b300854de0e181a518c165795135f704275..7048a79bc5c934b9ec27b790e07872794778fad9 100644 (file)
--- a/src/runtime/CL/functions/CLCast.cpp
+++ b/src/runtime/CL/functions/CLCast.cpp
@@ -31,9 +31,14 @@
  namespace arm_compute
  {
  void CLCast::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, policy);
+}
+
+void CLCast::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLDepthConvertLayerKernel>();
-    k->configure(input, output, policy, 0);
+    k->configure(compile_context, input, output, policy, 0);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLChannelCombine.cpp b/src/runtime/CL/functions/CLChannelCombine.cpp

index c7f2748b0b90cbf6f0acb887ab90f6bae7c8fc30..249212e03b9bfcc776e1e7e6a46b4bc5caf31752 100644 (file)
--- a/src/runtime/CL/functions/CLChannelCombine.cpp
+++ b/src/runtime/CL/functions/CLChannelCombine.cpp
@@ -31,15 +31,25 @@
  using namespace arm_compute;
  
  void CLChannelCombine::configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), plane0, plane1, plane2, plane3, output);
+}
+
+void CLChannelCombine::configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLChannelCombineKernel>();
-    k->configure(plane0, plane1, plane2, plane3, output);
+    k->configure(compile_context, plane0, plane1, plane2, plane3, output);
      _kernel = std::move(k);
  }
  
  void CLChannelCombine::configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), plane0, plane1, plane2, output);
+}
+
+void CLChannelCombine::configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLChannelCombineKernel>();
-    k->configure(plane0, plane1, plane2, output);
+    k->configure(compile_context, plane0, plane1, plane2, output);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLChannelExtract.cpp b/src/runtime/CL/functions/CLChannelExtract.cpp

index 810103994d2bd31fa432166d206545a3f1a05db5..019e0a7a90020fe5c555cee3cee2b675973f0860 100644 (file)
--- a/src/runtime/CL/functions/CLChannelExtract.cpp
+++ b/src/runtime/CL/functions/CLChannelExtract.cpp
@@ -31,15 +31,25 @@
  using namespace arm_compute;
  
  void CLChannelExtract::configure(const ICLTensor *input, Channel channel, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, channel, output);
+}
+
+void CLChannelExtract::configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLChannelExtractKernel>();
-    k->configure(input, channel, output);
+    k->configure(compile_context, input, channel, output);
      _kernel = std::move(k);
  }
  
  void CLChannelExtract::configure(const ICLMultiImage *input, Channel channel, ICLImage *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, channel, output);
+}
+
+void CLChannelExtract::configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLChannelExtractKernel>();
-    k->configure(input, channel, output);
+    k->configure(compile_context, input, channel, output);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLChannelShuffleLayer.cpp b/src/runtime/CL/functions/CLChannelShuffleLayer.cpp

index 2e1192a725e33f9d1fc7ee85819c011d54a895bb..93ab7c7ddfb145422c85e1bd254db20af2eb76e7 100644 (file)
--- a/src/runtime/CL/functions/CLChannelShuffleLayer.cpp
+++ b/src/runtime/CL/functions/CLChannelShuffleLayer.cpp
@@ -30,9 +30,14 @@
  namespace arm_compute
  {
  void CLChannelShuffleLayer::configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, num_groups);
+}
+
+void CLChannelShuffleLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLChannelShuffleLayerKernel>();
-    k->configure(input, output, num_groups);
+    k->configure(compile_context, input, output, num_groups);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLColorConvert.cpp b/src/runtime/CL/functions/CLColorConvert.cpp

index 49aacea78876a552563b6bb33dc4578141a8a6b4..b8e597751b714f5711cbd5ad4877b44af8368c08 100644 (file)
--- a/src/runtime/CL/functions/CLColorConvert.cpp
+++ b/src/runtime/CL/functions/CLColorConvert.cpp
@@ -31,29 +31,49 @@
  using namespace arm_compute;
  
  void CLColorConvert::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLColorConvert::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLColorConvertKernel>();
-    k->configure(input, output);
+    k->configure(compile_context, input, output);
      _kernel = std::move(k);
  }
  
  void CLColorConvert::configure(const ICLImage *input, ICLMultiImage *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLColorConvert::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLColorConvertKernel>();
-    k->configure(input, output);
+    k->configure(compile_context, input, output);
      _kernel = std::move(k);
  }
  
  void CLColorConvert::configure(const ICLMultiImage *input, ICLImage *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLColorConvert::configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLColorConvertKernel>();
-    k->configure(input, output);
+    k->configure(compile_context, input, output);
      _kernel = std::move(k);
  }
  
  void CLColorConvert::configure(const ICLMultiImage *input, ICLMultiImage *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLColorConvert::configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLColorConvertKernel>();
-    k->configure(input, output);
+    k->configure(compile_context, input, output);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLComparison.cpp b/src/runtime/CL/functions/CLComparison.cpp

index ac56d5782cc9ceffbe03dbb581b43f386acc0bea..8d5ec3571d42ddfde22d0ad09d1889a600b731ff 100644 (file)
--- a/src/runtime/CL/functions/CLComparison.cpp
+++ b/src/runtime/CL/functions/CLComparison.cpp
@@ -31,9 +31,14 @@
  namespace arm_compute
  {
  void CLComparison::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ComparisonOperation operation)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, operation);
+}
+
+void CLComparison::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ComparisonOperation operation)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLComparisonKernel>();
-    k->configure(input1, input2, output, operation);
+    k->configure(compile_context, input1, input2, output, operation);
      _kernel = std::move(k);
  
      if(output->info()->dimension(0) > 1)
@@ -42,7 +47,7 @@ void CLComparison::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *ou
  
          if(broadcasted_info->info()->dimension(0) == 1)
          {
-            _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+            _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
          }
      }
  }
@@ -54,9 +59,15 @@ Status CLComparison::validate(const ITensorInfo *input1, const ITensorInfo *inpu
  
  template <ComparisonOperation COP>
  void CLComparisonStatic<COP>::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output);
+}
+
+template <ComparisonOperation COP>
+void CLComparisonStatic<COP>::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLComparisonKernel>();
-    k->configure(input1, input2, output, COP);
+    k->configure(compile_context, input1, input2, output, COP);
      _kernel = std::move(k);
  
      if(output->info()->dimension(0) > 1)
@@ -65,7 +76,7 @@ void CLComparisonStatic<COP>::configure(ICLTensor *input1, ICLTensor *input2, IC
  
          if(broadcasted_info->info()->dimension(0) == 1)
          {
-            _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+            _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
          }
      }
  }
diff --git a/src/runtime/CL/functions/CLComputeAllAnchors.cpp b/src/runtime/CL/functions/CLComputeAllAnchors.cpp

index b71e89c80433e478ace917fa0028638c8b7dfe39..62714fed5c1f856b0895bc0e8a42ddd49fbdcacb 100644 (file)
--- a/src/runtime/CL/functions/CLComputeAllAnchors.cpp
+++ b/src/runtime/CL/functions/CLComputeAllAnchors.cpp
@@ -28,10 +28,15 @@
  namespace arm_compute
  {
  void CLComputeAllAnchors::configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), anchors, all_anchors, info);
+}
+
+void CLComputeAllAnchors::configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info)
  {
      // Configure ComputeAllAnchors kernel
      auto k = arm_compute::support::cpp14::make_unique<CLComputeAllAnchorsKernel>();
-    k->configure(anchors, all_anchors, info);
+    k->configure(compile_context, anchors, all_anchors, info);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp

index c85450b2f669f8add0bafeb7cee1a4a31d798ad4..e97256713f0ef6e9f34b469ba1e1f693c66a672e 100644 (file)
--- a/src/runtime/CL/functions/CLConcatenateLayer.cpp
+++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp
@@ -49,12 +49,22 @@ CLConcatenateLayer::CLConcatenateLayer()
  
  void CLConcatenateLayer::configure(std::vector<ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis)
  {
-    configure_internal(std::move(inputs_vector), output, axis);
+    configure(CLKernelLibrary::get().get_compile_context(), inputs_vector, output, axis);
+}
+
+void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std::vector<ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis)
+{
+    configure_internal(compile_context, std::move(inputs_vector), output, axis);
  }
  
  void CLConcatenateLayer::configure(std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis)
  {
-    configure_internal(std::move(inputs_vector), output, axis);
+    configure(CLKernelLibrary::get().get_compile_context(), inputs_vector, output, axis);
+}
+
+void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis)
+{
+    configure_internal(compile_context, std::move(inputs_vector), output, axis);
  }
  
  Status CLConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
@@ -68,7 +78,7 @@ Status CLConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inpu
  }
  
  template <typename TensorType>
-void CLConcatenateLayer::configure_internal(std::vector<TensorType *> &&inputs_vector, ICLTensor *output, size_t axis)
+void CLConcatenateLayer::configure_internal(const CLCompileContext &compile_context, std::vector<TensorType *> &&inputs_vector, ICLTensor *output, size_t axis)
  {
      ARM_COMPUTE_ERROR_ON(output == nullptr);
      _axis       = axis;
@@ -97,7 +107,7 @@ void CLConcatenateLayer::configure_internal(std::vector<TensorType *> &&inputs_v
                  {
                      // Configure WidthConcatenate2Tensors kernel
                      auto kernel = support::cpp14::make_unique<CLWidthConcatenate2TensorsKernel>();
-                    kernel->configure(inputs_vector.at(0), inputs_vector.at(1), output);
+                    kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), output);
                      _concat_kernels.emplace_back(std::move(kernel));
                      break;
                  }
@@ -105,7 +115,7 @@ void CLConcatenateLayer::configure_internal(std::vector<TensorType *> &&inputs_v
                  {
                      // Configure WidthConcatenate4Tensors kernel
                      auto kernel = support::cpp14::make_unique<CLWidthConcatenate4TensorsKernel>();
-                    kernel->configure(inputs_vector.at(0), inputs_vector.at(1), inputs_vector.at(2), inputs_vector.at(3), output);
+                    kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), inputs_vector.at(2), inputs_vector.at(3), output);
                      _concat_kernels.emplace_back(std::move(kernel));
                      break;
                  }
@@ -115,7 +125,7 @@ void CLConcatenateLayer::configure_internal(std::vector<TensorType *> &&inputs_v
                      for(unsigned int i = 0; i < _num_inputs; ++i)
                      {
                          auto kernel = support::cpp14::make_unique<CLWidthConcatenateLayerKernel>();
-                        kernel->configure(inputs_vector.at(i), offset, output);
+                        kernel->configure(compile_context, inputs_vector.at(i), offset, output);
                          offset += inputs_vector.at(i)->info()->dimension(_axis);
                          _concat_kernels.emplace_back(std::move(kernel));
                      }
@@ -129,7 +139,7 @@ void CLConcatenateLayer::configure_internal(std::vector<TensorType *> &&inputs_v
              for(unsigned int i = 0; i < _num_inputs; ++i)
              {
                  auto kernel = support::cpp14::make_unique<CLHeightConcatenateLayerKernel>();
-                kernel->configure(inputs_vector.at(i), offset, output);
+                kernel->configure(compile_context, inputs_vector.at(i), offset, output);
                  offset += inputs_vector.at(i)->info()->dimension(_axis);
                  _concat_kernels.emplace_back(std::move(kernel));
              }
@@ -140,7 +150,7 @@ void CLConcatenateLayer::configure_internal(std::vector<TensorType *> &&inputs_v
              for(unsigned int i = 0; i < _num_inputs; ++i)
              {
                  auto kernel = support::cpp14::make_unique<CLDepthConcatenateLayerKernel>();
-                kernel->configure(inputs_vector.at(i), offset, output);
+                kernel->configure(compile_context, inputs_vector.at(i), offset, output);
                  offset += inputs_vector.at(i)->info()->dimension(_axis);
                  _concat_kernels.emplace_back(std::move(kernel));
              }
@@ -151,7 +161,7 @@ void CLConcatenateLayer::configure_internal(std::vector<TensorType *> &&inputs_v
              for(unsigned int i = 0; i < _num_inputs; ++i)
              {
                  auto kernel = support::cpp14::make_unique<CLBatchConcatenateLayerKernel>();
-                kernel->configure(inputs_vector.at(i), offset, output);
+                kernel->configure(compile_context, inputs_vector.at(i), offset, output);
                  offset += inputs_vector.at(i)->info()->dimension(_axis);
                  _concat_kernels.emplace_back(std::move(kernel));
              }
diff --git a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp

index 02927e83ad82a2e07eb27d357502a98740d8ed77..68c0fb6ebfdc5ef20bbaec65c25713e6a3838b8d 100644 (file)
--- a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
+++ b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
@@ -27,9 +27,15 @@ namespace arm_compute
  {
  void CLConvertFullyConnectedWeights::configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape,
                                                 DataLayout data_layout)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, original_input_shape, data_layout);
+}
+
+void CLConvertFullyConnectedWeights::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape,
+                                               DataLayout data_layout)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLConvertFullyConnectedWeightsKernel>();
-    k->configure(input, output, original_input_shape, data_layout);
+    k->configure(compile_context, input, output, original_input_shape, data_layout);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLConvolution.cpp b/src/runtime/CL/functions/CLConvolution.cpp

index b86a08e9574456c31198ff17a1527ff2adefefa7..2b0d7d5e53ac69496784650539637c863f0c1b31 100644 (file)
--- a/src/runtime/CL/functions/CLConvolution.cpp
+++ b/src/runtime/CL/functions/CLConvolution.cpp
@@ -39,11 +39,17 @@
  using namespace arm_compute;
  
  void CLConvolution3x3::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, scale, border_mode, constant_border_value);
+}
+
+void CLConvolution3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode,
+                                 uint8_t constant_border_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLConvolution3x3Kernel>();
-    k->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
+    k->configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
      _kernel = std::move(k);
-    _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
  }
  
  template <unsigned int matrix_size>
@@ -55,6 +61,13 @@ CLConvolutionSquare<matrix_size>::CLConvolutionSquare(std::shared_ptr<IMemoryMan
  template <unsigned int matrix_size>
  void CLConvolutionSquare<matrix_size>::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode,
                                                   uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, scale, border_mode, constant_border_value);
+}
+
+template <unsigned int matrix_size>
+void CLConvolutionSquare<matrix_size>::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode,
+                                                 uint8_t constant_border_value)
  {
      ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
      ARM_COMPUTE_ERROR_ON(conv == nullptr);
@@ -75,17 +88,17 @@ void CLConvolutionSquare<matrix_size>::configure(ICLTensor *input, ICLTensor *ou
              scale = calculate_matrix_scale(conv, matrix_size);
          }
  
-        _kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
-        _kernel_vert.configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED, type_pair.second);
-        _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+        _kernel_hor.configure(compile_context, input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
+        _kernel_vert.configure(compile_context, &_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED, type_pair.second);
+        _border_handler.configure(compile_context, input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
  
          // Allocate intermediate buffer
          _tmp.allocator()->allocate();
      }
      else
      {
-        _kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
-        _border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value));
+        _kernel.configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
+        _border_handler.configure(compile_context, input, _kernel.border_size(), border_mode, PixelValue(constant_border_value));
      }
  }
  
@@ -112,9 +125,15 @@ template class arm_compute::CLConvolutionSquare<7>;
  template class arm_compute::CLConvolutionSquare<9>;
  
  void CLConvolutionRectangle::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, rows, cols, scale, border_mode, constant_border_value);
+}
+
+void CLConvolutionRectangle::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale,
+                                       BorderMode border_mode, uint8_t constant_border_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLConvolutionRectangleKernel>();
-    k->configure(input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED);
+    k->configure(compile_context, input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED);
      _kernel = std::move(k);
-    _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
  }
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp

index c271f502e90bfdd5a68b5ece9523ac976b7bb4d7..b6e1413f7aaca8d9019a2861db20d050d3ebafde 100644 (file)
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -45,6 +45,13 @@ CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_ma
  
  void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
                                     const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups);
+}
+
+void CLConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                                   const WeightsInfo &weights_info,
+                                   const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
      ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info,
@@ -57,7 +64,7 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c
          {
              ARM_COMPUTE_ERROR_ON(num_groups != 1);
              auto f = arm_compute::support::cpp14::make_unique<CLWinogradConvolutionLayer>(_memory_manager);
-            f->configure(input, weights, biases, output, conv_info, act_info, enable_fast_math);
+            f->configure(compile_context, input, weights, biases, output, conv_info, act_info, enable_fast_math);
              _function = std::move(f);
              break;
          }
@@ -65,21 +72,21 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c
          {
              ARM_COMPUTE_ERROR_ON(num_groups != 1);
              auto f = arm_compute::support::cpp14::make_unique<CLDirectConvolutionLayer>();
-            f->configure(input, weights, biases, output, conv_info, act_info);
+            f->configure(compile_context, input, weights, biases, output, conv_info, act_info);
              _function = std::move(f);
              break;
          }
          case ConvolutionMethod::GEMM:
          {
              auto f = arm_compute::support::cpp14::make_unique<CLGEMMConvolutionLayer>(_memory_manager);
-            f->configure(input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups);
+            f->configure(compile_context, input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups);
              _function = std::move(f);
              break;
          }
          case ConvolutionMethod::FFT:
          {
              auto f = arm_compute::support::cpp14::make_unique<CLFFTConvolutionLayer>(_memory_manager);
-            f->configure(input, weights, biases, output, conv_info, act_info);
+            f->configure(compile_context, input, weights, biases, output, conv_info, act_info);
              _function = std::move(f);
              break;
          }
diff --git a/src/runtime/CL/functions/CLCopy.cpp b/src/runtime/CL/functions/CLCopy.cpp

index 3692fda6b2b39ec8245f367614f890d0d8e03c71..4c5d62a82cc565a50b1080266866144e64b55c9a 100644 (file)
--- a/src/runtime/CL/functions/CLCopy.cpp
+++ b/src/runtime/CL/functions/CLCopy.cpp
@@ -36,9 +36,14 @@
  using namespace arm_compute;
  
  void CLCopy::configure(ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLCopy::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLCopyKernel>();
-    k->configure(input, output);
+    k->configure(compile_context, input, output);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLCropResize.cpp b/src/runtime/CL/functions/CLCropResize.cpp

index 5e1278df5be1326697d14c3720d20b212f2a7e7a..17fc80e146b364fe3c86b69b2875d66cdc8afb3b 100644 (file)
--- a/src/runtime/CL/functions/CLCropResize.cpp
+++ b/src/runtime/CL/functions/CLCropResize.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -21,11 +21,10 @@
   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   * SOFTWARE.
   */
+#include "arm_compute/runtime/CL/functions/CLCropResize.h"
  
  #include "arm_compute/core/CL/CLHelpers.h"
-
  #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/functions/CLCropResize.h"
  
  #include <cstddef>
  
@@ -51,120 +50,10 @@ inline void configure_crop(const ICLTensor *input, ICLTensor *crop_boxes, ICLTen
      const TensorShape out_shape(input->info()->tensor_shape()[0], static_cast<uint32_t>(abs(end[0] - start[0])) + 1, static_cast<uint32_t>(abs(end[1] - start[1])) + 1);
      output->info()->set_tensor_shape(out_shape);
  }
-
-inline void run_crop(const ICLTensor *input, ICLTensor *output, uint32_t batch_index, Coordinates start, Coordinates end, float extrapolation_value)
-{
-    bool is_width_flipped  = end[0] < start[0];
-    bool is_height_flipped = end[1] < start[1];
-    /** The number of rows out of bounds at the start and end of output. */
-    std::array<int32_t, 2> rows_out_of_bounds{ 0 };
-    /** The number of columns out of bounds at the start and end of output. */
-    std::array<int32_t, 2> cols_out_of_bounds{ 0 };
-    if(is_height_flipped)
-    {
-        rows_out_of_bounds[0] = start[1] >= static_cast<int32_t>(input->info()->dimension(2)) ? std::min(start[1] - input->info()->dimension(2) + 1, output->info()->dimension(2)) : 0;
-        rows_out_of_bounds[1] = end[1] < 0 ? std::min(-end[1], static_cast<int32_t>(output->info()->dimension(2))) : 0;
-    }
-    else
-    {
-        rows_out_of_bounds[0] = start[1] < 0 ? std::min(-start[1], static_cast<int32_t>(output->info()->dimension(2))) : 0;
-        rows_out_of_bounds[1] = end[1] >= static_cast<int32_t>(input->info()->dimension(2)) ? std::min(end[1] - input->info()->dimension(2) + 1, output->info()->dimension(2)) : 0;
-    }
-    if(is_width_flipped)
-    {
-        cols_out_of_bounds[0] = start[0] >= static_cast<int32_t>(input->info()->dimension(1)) ? std::min(start[0] - input->info()->dimension(1) + 1, output->info()->dimension(1)) : 0;
-        cols_out_of_bounds[1] = end[0] < 0 ? std::min(-end[0], static_cast<int32_t>(output->info()->dimension(1))) : 0;
-    }
-    else
-    {
-        cols_out_of_bounds[0] = start[0] < 0 ? std::min(-start[0], static_cast<int32_t>(output->info()->dimension(1))) : 0;
-        cols_out_of_bounds[1] = end[0] >= static_cast<int32_t>(input->info()->dimension(1)) ? std::min(end[0] - input->info()->dimension(1) + 1, output->info()->dimension(1)) : 0;
-    }
-
-    Window full_window = calculate_max_window(*output->info());
-
-    //  Full output window:
-    //  --------------------------------
-    //  |          Out of bounds       |
-    //  |          rows before         |
-    //  |------------------------------|
-    //  | Out of | In         | Out of |
-    //  | bounds | bounds     | bounds |
-    //  | cols   | elements   | cols   |
-    //  | before | copied     | after  |
-    //  |        | from input |        |
-    //  |------------------------------|
-    //  |        Out of bounds         |
-    //  |        rows after            |
-    //  |------------------------------|
-    // Use a separate output window for each section of the full output window.
-    // Fill all output rows that have no elements that are within the input bounds
-    // with the extrapolation value using memset.
-    // First for the rows before the in bounds rows.
-    if(rows_out_of_bounds[0] > 0)
-    {
-        Window slice_fill_rows_before(full_window);
-        slice_fill_rows_before.set(2, Window::Dimension(0, rows_out_of_bounds[0], 1));
-        auto kernel = arm_compute::support::cpp14::make_unique<CLMemsetKernel>();
-        kernel->configure(output, extrapolation_value, &slice_fill_rows_before);
-        CLScheduler::get().enqueue(*kernel);
-    }
-
-    Window slice_in(full_window);
-    slice_in.set(2, Window::Dimension(rows_out_of_bounds[0], output->info()->dimension(2) - rows_out_of_bounds[1], 1));
-    slice_in.set(1, Window::Dimension(cols_out_of_bounds[0], output->info()->dimension(1) - cols_out_of_bounds[1], 1));
-
-    int rows_in_bounds = static_cast<int32_t>(output->info()->dimension(2)) - rows_out_of_bounds[0] - rows_out_of_bounds[1];
-    if(rows_in_bounds > 0)
-    {
-        // Fill all elements that share a row with an in bounds element with the extrapolation value.
-        if(cols_out_of_bounds[0] > 0)
-        {
-            Window slice_fill_cols_before(slice_in);
-            slice_fill_cols_before.set(1, Window::Dimension(0, cols_out_of_bounds[0], 1));
-            auto kernel = arm_compute::support::cpp14::make_unique<CLMemsetKernel>();
-            kernel->configure(output, extrapolation_value, &slice_fill_cols_before);
-            CLScheduler::get().enqueue(*kernel);
-        }
-
-        if(cols_out_of_bounds[1] > 0)
-        {
-            Window slice_fill_cols_after(slice_in);
-            slice_fill_cols_after.set(1, Window::Dimension(output->info()->dimension(1) - cols_out_of_bounds[1], output->info()->dimension(1), 1));
-            auto kernel = arm_compute::support::cpp14::make_unique<CLMemsetKernel>();
-            kernel->configure(output, extrapolation_value, &slice_fill_cols_after);
-            CLScheduler::get().enqueue(*kernel);
-        }
-
-        // Copy all elements within the input bounds from the input tensor.
-        int cols_in_bounds = static_cast<int32_t>(output->info()->dimension(1)) - cols_out_of_bounds[0] - cols_out_of_bounds[1];
-        if(cols_in_bounds > 0)
-        {
-            Coordinates2D start_in{ is_width_flipped ? start[0] - cols_out_of_bounds[0] : start[0] + cols_out_of_bounds[0],
-                                    is_height_flipped ? start[1] - rows_out_of_bounds[0] : start[1] + rows_out_of_bounds[0] };
-            Coordinates2D end_in{ is_width_flipped ? start_in.x - cols_in_bounds + 1 : start_in.x + cols_in_bounds - 1,
-                                  is_height_flipped ? start_in.y - rows_in_bounds + 1 : start_in.y + rows_in_bounds - 1 };
-            auto kernel = arm_compute::support::cpp14::make_unique<CLCropKernel>();
-
-            kernel->configure(input, output, start_in, end_in, batch_index, extrapolation_value, &slice_in);
-            CLScheduler::get().enqueue(*kernel);
-        }
-    }
-
-    // Fill all rows after the in bounds elements with the extrapolation value.
-    if(rows_out_of_bounds[1] > 0)
-    {
-        Window slice_fill_rows_after(full_window);
-        slice_fill_rows_after.set(2, Window::Dimension(output->info()->dimension(2) - rows_out_of_bounds[1], output->info()->dimension(2), 1));
-        auto kernel = arm_compute::support::cpp14::make_unique<CLMemsetKernel>();
-        kernel->configure(output, extrapolation_value, &slice_fill_rows_after);
-        CLScheduler::get().enqueue(*kernel);
-    }
-}
  } // namespace
  
  CLCropResize::CLCropResize()
-    : _input(nullptr), _boxes(nullptr), _box_ind(nullptr), _output(nullptr), _num_boxes(0), _method(), _extrapolation_value(0), _scale(), _copy(), _crop_results(), _scaled_results()
+    : _input(nullptr), _boxes(nullptr), _box_ind(nullptr), _output(nullptr), _num_boxes(0), _method(), _extrapolation_value(0), _scale(), _copy(), _crop_results(), _scaled_results(), _internal_kernels()
  {
  }
  
@@ -190,9 +79,18 @@ Status CLCropResize::validate(const ITensorInfo *input, ITensorInfo *boxes, ITen
  void CLCropResize::configure(const ICLTensor *input, ICLTensor *boxes, ICLTensor *box_ind, ICLTensor *output, Coordinates2D crop_size,
                               InterpolationPolicy method, float extrapolation_value)
  {
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    configure(CLKernelLibrary::get().get_compile_context(), input, boxes, box_ind, output, crop_size, method, extrapolation_value);
+}
+
+void CLCropResize::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *boxes, ICLTensor *box_ind, ICLTensor *output, Coordinates2D crop_size,
+                             InterpolationPolicy method, float extrapolation_value)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, boxes, box_ind);
      ARM_COMPUTE_ERROR_THROW_ON(CLCropResize::validate(input->info(), boxes->info(), box_ind->info(), output->info(), crop_size, method, extrapolation_value));
  
+    TensorShape output_shape = TensorShape(input->info()->tensor_shape()[0], crop_size.x, crop_size.y, boxes->info()->tensor_shape()[1]);
+    auto_init_if_empty(*output->info(), output_shape, 1, DataType::F32);
+
      _num_boxes = boxes->info()->tensor_shape()[1];
      TensorShape out_shape(input->info()->tensor_shape()[0], crop_size.x, crop_size.y);
  
@@ -210,7 +108,13 @@ void CLCropResize::configure(const ICLTensor *input, ICLTensor *boxes, ICLTensor
      // - A scale function is used to resize the cropped image to the size specified by crop_size.
      // - A tensor is required to hold the final scaled image before it is copied into the 4D output
      //   that will hold all final cropped and scaled 3D images using CLCopyKernel.
-    for(unsigned int i = 0; i < _num_boxes; ++i)
+
+    // The contents of _boxes and _box_ind are required to calculate the shape
+    // of the initial cropped image and thus are required to configure the
+    // kernels used for cropping and scaling.
+    _boxes->map(CLScheduler::get().queue());
+    _box_ind->map(CLScheduler::get().queue());
+    for(unsigned int num_box = 0; num_box < _num_boxes; ++num_box)
      {
          auto       crop_tensor = support::cpp14::make_unique<CLTensor>();
          TensorInfo crop_result_info(1, DataType::F32);
@@ -223,45 +127,149 @@ void CLCropResize::configure(const ICLTensor *input, ICLTensor *boxes, ICLTensor
          scaled_result_info.set_data_layout(DataLayout::NHWC);
          scale_tensor->allocator()->init(scaled_result_info);
          _scaled_results.emplace_back(std::move(scale_tensor));
-    }
-}
  
-void CLCropResize::run()
-{
-    ARM_COMPUTE_ERROR_ON_MSG(_output == nullptr, "Unconfigured function");
-    // The contents of _boxes and _box_ind are required to calculate the shape
-    // of the initial cropped image and thus are required to configure the
-    // kernels used for cropping and scaling.
-    _boxes->map(CLScheduler::get().queue());
-    _box_ind->map(CLScheduler::get().queue());
-    for(unsigned int i = 0; i < _num_boxes; ++i)
-    {
-        // Size of the crop box in _boxes and thus the shape of _crop_results[i]
-        // may not be known until run-time and so the kernels cannot be configured until then.
+        // Size of the crop box in _boxes has to be given before the configure
          uint32_t    batch_index;
          Coordinates start{};
          Coordinates end{};
-        configure_crop(_input, _boxes, _box_ind, _crop_results[i].get(), i, start, end, batch_index);
+        configure_crop(_input, _boxes, _box_ind, _crop_results[num_box].get(), num_box, start, end, batch_index);
  
          auto scale_kernel = support::cpp14::make_unique<CLScale>();
-        scale_kernel->configure(_crop_results[i].get(), _scaled_results[i].get(), _method, BorderMode::CONSTANT, PixelValue(_extrapolation_value), SamplingPolicy::TOP_LEFT);
+        scale_kernel->configure(compile_context, _crop_results[num_box].get(), _scaled_results[num_box].get(), _method, BorderMode::CONSTANT, PixelValue(_extrapolation_value), SamplingPolicy::TOP_LEFT);
          _scale.emplace_back(std::move(scale_kernel));
  
          Window win = calculate_max_window(*_output->info());
-        win.set(3, Window::Dimension(i, i + 1, 1));
+        win.set(3, Window::Dimension(num_box, num_box + 1, 1));
  
          auto copy_kernel = support::cpp14::make_unique<CLCopyKernel>();
-        copy_kernel->configure(_scaled_results[i].get(), _output, PaddingList(), &win);
+        copy_kernel->configure(compile_context, _scaled_results[num_box].get(), _output, PaddingList(), &win);
          _copy.emplace_back(std::move(copy_kernel));
  
-        _crop_results[i]->allocator()->allocate();
-        _scaled_results[i]->allocator()->allocate();
+        _crop_results[num_box]->allocator()->allocate();
+        _scaled_results[num_box]->allocator()->allocate();
+
+        bool is_width_flipped  = end[0] < start[0];
+        bool is_height_flipped = end[1] < start[1];
+        /** The number of rows out of bounds at the start and end of _crop_results[num_box].get(). */
+        std::array<int32_t, 2> rows_out_of_bounds{ 0 };
+        /** The number of columns out of bounds at the start and end of _crop_results[num_box].get(). */
+        std::array<int32_t, 2> cols_out_of_bounds{ 0 };
+        if(is_height_flipped)
+        {
+            rows_out_of_bounds[0] = start[1] >= static_cast<int32_t>(_input->info()->dimension(2)) ? std::min(start[1] - _input->info()->dimension(2) + 1, _crop_results[num_box].get()->info()->dimension(2)) : 0;
+            rows_out_of_bounds[1] = end[1] < 0 ? std::min(-end[1], static_cast<int32_t>(_crop_results[num_box].get()->info()->dimension(2))) : 0;
+        }
+        else
+        {
+            rows_out_of_bounds[0] = start[1] < 0 ? std::min(-start[1], static_cast<int32_t>(_crop_results[num_box].get()->info()->dimension(2))) : 0;
+            rows_out_of_bounds[1] = end[1] >= static_cast<int32_t>(_input->info()->dimension(2)) ? std::min(end[1] - _input->info()->dimension(2) + 1, _crop_results[num_box].get()->info()->dimension(2)) : 0;
+        }
+        if(is_width_flipped)
+        {
+            cols_out_of_bounds[0] = start[0] >= static_cast<int32_t>(_input->info()->dimension(1)) ? std::min(start[0] - _input->info()->dimension(1) + 1, _crop_results[num_box].get()->info()->dimension(1)) : 0;
+            cols_out_of_bounds[1] = end[0] < 0 ? std::min(-end[0], static_cast<int32_t>(_crop_results[num_box].get()->info()->dimension(1))) : 0;
+        }
+        else
+        {
+            cols_out_of_bounds[0] = start[0] < 0 ? std::min(-start[0], static_cast<int32_t>(_crop_results[num_box].get()->info()->dimension(1))) : 0;
+            cols_out_of_bounds[1] = end[0] >= static_cast<int32_t>(_input->info()->dimension(1)) ? std::min(end[0] - _input->info()->dimension(1) + 1, _crop_results[num_box].get()->info()->dimension(1)) : 0;
+        }
+
+        Window full_window = calculate_max_window(*_crop_results[num_box].get()->info());
+
+        //  Full _crop_results[num_box].get() window:
+        //  --------------------------------
+        //  |          Out of bounds       |
+        //  |          rows before         |
+        //  |------------------------------|
+        //  | Out of | In         | Out of |
+        //  | bounds | bounds     | bounds |
+        //  | cols   | elements   | cols   |
+        //  | before | copied     | after  |
+        //  |        | from input |        |
+        //  |------------------------------|
+        //  |        Out of bounds         |
+        //  |        rows after            |
+        //  |------------------------------|
+        // Use a separate _crop_results[num_box].get() window for each section of the full _crop_results[num_box].get() window.
+        // Fill all _crop_results[num_box].get() rows that have no elements that are within the input bounds
+        // with the extrapolation value using memset.
+        // First for the rows before the in bounds rows.
+        if(rows_out_of_bounds[0] > 0)
+        {
+            Window slice_fill_rows_before(full_window);
+            slice_fill_rows_before.set(2, Window::Dimension(0, rows_out_of_bounds[0], 1));
+            auto kernel = arm_compute::support::cpp14::make_unique<CLMemsetKernel>();
+            kernel->configure(compile_context, _crop_results[num_box].get(), extrapolation_value, &slice_fill_rows_before);
+            _internal_kernels.push_back(std::move(kernel));
+        }
+
+        Window slice_in(full_window);
+        slice_in.set(2, Window::Dimension(rows_out_of_bounds[0], _crop_results[num_box].get()->info()->dimension(2) - rows_out_of_bounds[1], 1));
+        slice_in.set(1, Window::Dimension(cols_out_of_bounds[0], _crop_results[num_box].get()->info()->dimension(1) - cols_out_of_bounds[1], 1));
+
+        int rows_in_bounds = static_cast<int32_t>(_crop_results[num_box].get()->info()->dimension(2)) - rows_out_of_bounds[0] - rows_out_of_bounds[1];
+        if(rows_in_bounds > 0)
+        {
+            // Fill all elements that share a row with an in bounds element with the extrapolation value.
+            if(cols_out_of_bounds[0] > 0)
+            {
+                Window slice_fill_cols_before(slice_in);
+                slice_fill_cols_before.set(1, Window::Dimension(0, cols_out_of_bounds[0], 1));
+                auto kernel = arm_compute::support::cpp14::make_unique<CLMemsetKernel>();
+                kernel->configure(compile_context, _crop_results[num_box].get(), extrapolation_value, &slice_fill_cols_before);
+                _internal_kernels.push_back(std::move(kernel));
+            }
  
-        run_crop(_input, _crop_results[i].get(), batch_index, start, end, _extrapolation_value);
+            if(cols_out_of_bounds[1] > 0)
+            {
+                Window slice_fill_cols_after(slice_in);
+                slice_fill_cols_after.set(1, Window::Dimension(_crop_results[num_box].get()->info()->dimension(1) - cols_out_of_bounds[1], _crop_results[num_box].get()->info()->dimension(1), 1));
+                auto kernel = arm_compute::support::cpp14::make_unique<CLMemsetKernel>();
+                kernel->configure(compile_context, _crop_results[num_box].get(), extrapolation_value, &slice_fill_cols_after);
+                _internal_kernels.push_back(std::move(kernel));
+            }
+
+            // Copy all elements within the input bounds from the input tensor.
+            int cols_in_bounds = static_cast<int32_t>(_crop_results[num_box].get()->info()->dimension(1)) - cols_out_of_bounds[0] - cols_out_of_bounds[1];
+            if(cols_in_bounds > 0)
+            {
+                Coordinates2D start_in{ is_width_flipped ? start[0] - cols_out_of_bounds[0] : start[0] + cols_out_of_bounds[0],
+                                        is_height_flipped ? start[1] - rows_out_of_bounds[0] : start[1] + rows_out_of_bounds[0] };
+                Coordinates2D end_in{ is_width_flipped ? start_in.x - cols_in_bounds + 1 : start_in.x + cols_in_bounds - 1,
+                                      is_height_flipped ? start_in.y - rows_in_bounds + 1 : start_in.y + rows_in_bounds - 1 };
+                auto kernel = arm_compute::support::cpp14::make_unique<CLCropKernel>();
+
+                kernel->configure(compile_context, _input, _crop_results[num_box].get(), start_in, end_in, batch_index, extrapolation_value, &slice_in);
+                _internal_kernels.push_back(std::move(kernel));
+            }
+        }
+
+        // Fill all rows after the in bounds elements with the extrapolation value.
+        if(rows_out_of_bounds[1] > 0)
+        {
+            Window slice_fill_rows_after(full_window);
+            slice_fill_rows_after.set(2, Window::Dimension(_crop_results[num_box].get()->info()->dimension(2) - rows_out_of_bounds[1], _crop_results[num_box].get()->info()->dimension(2), 1));
+            auto kernel = arm_compute::support::cpp14::make_unique<CLMemsetKernel>();
+            kernel->configure(compile_context, _crop_results[num_box].get(), extrapolation_value, &slice_fill_rows_after);
+            _internal_kernels.push_back(std::move(kernel));
+        }
      }
      _boxes->unmap(CLScheduler::get().queue());
      _box_ind->unmap(CLScheduler::get().queue());
      CLScheduler::get().sync();
+}
+
+void CLCropResize::run()
+{
+    ARM_COMPUTE_ERROR_ON_MSG(_output == nullptr, "Unconfigured function");
+
+    for(unsigned int i = 0; i < _internal_kernels.size(); ++i)
+    {
+        CLScheduler::get().enqueue(*(_internal_kernels[i]));
+    }
+
+    CLScheduler::get().sync();
      for(auto &kernel : _scale)
      {
          kernel->run();
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp

index 7aa771428de6a2950aa73854f834354dffc4492e..62e7d9a582c25686818cc7b3923667c305177969 100644 (file)
--- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -43,6 +43,12 @@ CLDeconvolutionLayer::CLDeconvolutionLayer(std::shared_ptr<IMemoryManager> memor
  
  void CLDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info,
                                       const WeightsInfo &weights_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, deconv_info, weights_info);
+}
+
+void CLDeconvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info,
+                                     const WeightsInfo &weights_info)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
  
@@ -51,14 +57,14 @@ void CLDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, const
          case DeconvolutionMethod::DIRECT:
          {
              auto f = arm_compute::support::cpp14::make_unique<CLDirectDeconvolutionLayer>();
-            f->configure(input, weights, bias, output, deconv_info, weights_info);
+            f->configure(compile_context, input, weights, bias, output, deconv_info, weights_info);
              _function = std::move(f);
              break;
          }
          case DeconvolutionMethod::GEMM:
          {
              auto f = arm_compute::support::cpp14::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
-            f->configure(input, weights, bias, output, deconv_info);
+            f->configure(compile_context, input, weights, bias, output, deconv_info);
              _function = std::move(f);
              break;
          }
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp b/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp

index eaf7c66083b269455a33067a5cb840c9227ae342..be2d120dcdfa88da5ddfd044fe36374f4513c4f6 100644 (file)
--- a/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp
+++ b/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -43,12 +43,17 @@ Status CLDeconvolutionLayerUpsample::validate(const ITensorInfo *input, const IT
  }
  
  void CLDeconvolutionLayerUpsample::configure(ICLTensor *input, ICLTensor *output, const PadStrideInfo &info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, info);
+}
+
+void CLDeconvolutionLayerUpsample::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const PadStrideInfo &info)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
  
      _output = output;
-    _memset.configure(_output, PixelValue(0, _output->info()->data_type(), _output->info()->quantization_info()));
-    _upsample.configure(input, _output, info);
+    _memset.configure(compile_context, _output, PixelValue(0, _output->info()->data_type(), _output->info()->quantization_info()));
+    _upsample.configure(compile_context, input, _output, info);
  }
  
  void CLDeconvolutionLayerUpsample::run()
diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp

index 88e3cbcc17ac2a36d15ca98cfb2af59e0158d737..b848f989e6c009de80968a7be1af8e1f33580e56 100644 (file)
--- a/src/runtime/CL/functions/CLDepthConvertLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
@@ -31,9 +31,14 @@
  namespace arm_compute
  {
  void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, policy, shift);
+}
+
+void CLDepthConvertLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLDepthConvertLayerKernel>();
-    k->configure(input, output, policy, shift);
+    k->configure(compile_context, input, output, policy, shift);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp b/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp

index 5af979539a90bde0ee0ffe860f2bf0c3ee985ee4..89e5faa4d56236efd57f72653e5bbc5f454eb0da 100644 (file)
--- a/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp
@@ -31,9 +31,14 @@
  namespace arm_compute
  {
  void CLDepthToSpaceLayer::configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, block_shape);
+}
+
+void CLDepthToSpaceLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLDepthToSpaceLayerKernel>();
-    k->configure(input, output, block_shape);
+    k->configure(compile_context, input, output, block_shape);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp

index 00f6f6977137678adb7b704d23a5389bf2231e75..0b7a33401d2926db6b58f0c72320ee1cfb691db3 100644 (file)
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -125,7 +125,14 @@ CLDepthwiseConvolutionLayer3x3::CLDepthwiseConvolutionLayer3x3(std::shared_ptr<I
  void CLDepthwiseConvolutionLayer3x3::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
                                                 ActivationLayerInfo act_info, const Size2D &dilation)
  {
-    _func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
+    _func.configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
+}
+
+void CLDepthwiseConvolutionLayer3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+                                               const PadStrideInfo &conv_info, unsigned int depth_multiplier,
+                                               ActivationLayerInfo act_info, const Size2D &dilation)
+{
+    _func.configure(compile_context, input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
  }
  
  Status CLDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
@@ -166,6 +173,13 @@ CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::CLDepthwiseConv
  
  void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
                                                                                  unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
+}
+
+void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases,
+                                                                                ICLTensor *output, const PadStrideInfo &conv_info,
+                                                                                unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
      ARM_COMPUTE_ERROR_THROW_ON(CLDepthwiseConvolutionLayer::validate(input->info(),
@@ -193,11 +207,11 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(
          _memory_group.manage(&_permuted_output);
  
          // Configure the function to transform the input tensor from NCHW -> NHWC
-        _permute_input_to_nhwc.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
+        _permute_input_to_nhwc.configure(compile_context, input, &_permuted_input, PermutationVector(2U, 0U, 1U));
          _permuted_input.info()->set_data_layout(DataLayout::NHWC);
  
          // Configure the function to transform the weights tensor from IHW -> HWI
-        _permute_weights_to_nhwc.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
+        _permute_weights_to_nhwc.configure(compile_context, weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
          _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
  
          // Set output quantization info before dwc kernel configure
@@ -226,7 +240,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(
      dwc_weights_info.n0 = (depth_multiplier == 1) ? 8 : 1;
      DWCKernelInfo dwc_info;
      dwc_info.activation_info = act_info;
-    _dwc_native_kernel.configure(input_to_use, weights_to_use, biases, output_to_use,
+    _dwc_native_kernel.configure(compile_context, input_to_use, weights_to_use, biases, output_to_use,
                                   dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation,
                                   output_multipliers_to_use, output_shifts_to_use);
  
@@ -236,7 +250,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(
  
          // Configure the function to transform the convoluted output to NCHW format
          _permuted_output.info()->set_data_layout(DataLayout::NCHW);
-        _permute_output_to_nchw.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
+        _permute_output_to_nchw.configure(compile_context, &_permuted_output, output, PermutationVector(1U, 2U, 0U));
          _permuted_output.allocator()->allocate();
      }
  
@@ -385,6 +399,13 @@ CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::CLDepthwise
  
  void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
                                                                                      const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
+}
+
+void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases,
+                                                                                    ICLTensor           *output,
+                                                                                    const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation)
  {
      const GPUTarget gpu_target = CLScheduler::get().target();
  
@@ -429,11 +450,11 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config
          _memory_group.manage(&_permuted_output);
  
          // Configure the function to transform the input tensor from NHWC -> NCHW
-        _permute_input_to_nchw.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
+        _permute_input_to_nchw.configure(compile_context, input, &_permuted_input, PermutationVector(1U, 2U, 0U));
          _permuted_input.info()->set_data_layout(DataLayout::NCHW);
  
          // Configure the function to transform the weights tensor from HWI -> IHW
-        _permute_weights_to_nchw.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
+        _permute_weights_to_nchw.configure(compile_context, weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
          _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
          _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
  
@@ -447,7 +468,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config
      {
          if(_needs_weights_reshape)
          {
-            _reshape_weights.configure(weights, &_permuted_weights, info);
+            _reshape_weights.configure(compile_context, weights, &_permuted_weights, info);
              weights_to_use = &_permuted_weights;
          }
          _kernel = arm_compute::support::cpp14::make_unique<CLDepthwiseConvolutionLayer3x3NHWCKernel>();
@@ -473,7 +494,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config
  
      // Configure kernel
      _kernel->set_target(gpu_target);
-    _kernel->configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier,
+    _kernel->configure(compile_context, input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier,
                         act_info, dilation, output_multipliers_to_use, output_shifts_to_use);
  
      if(_is_quantized)
@@ -487,7 +508,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config
      {
          // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
          _permuted_output.info()->set_data_layout(DataLayout::NCHW);
-        _permute_output_to_nhwc.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
+        _permute_output_to_nhwc.configure(compile_context, &_permuted_output, output, PermutationVector(2U, 0U, 1U));
  
          // Allocate tensors
          _permuted_input.allocator()->allocate();
@@ -499,7 +520,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config
      {
          zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().uniform().offset));
      }
-    _border_handler.configure(input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value);
+    _border_handler.configure(compile_context, input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value);
  }
  
  Status CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
@@ -574,6 +595,14 @@ CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayer(std::shared_ptr<IMemory
  
  void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
                                              ActivationLayerInfo act_info, const Size2D &dilation)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
+}
+
+void CLDepthwiseConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+                                            const PadStrideInfo &conv_info,
+                                            unsigned int        depth_multiplier,
+                                            ActivationLayerInfo act_info, const Size2D &dilation)
  {
      const GPUTarget gpu_target = CLScheduler::get().target();
      _depth_conv_func           = get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info,
@@ -582,12 +611,12 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
      {
          case DepthwiseConvolutionFunction::OPTIMIZED:
              _func_3x3.set_memory_group(_memory_manager);
-            _func_3x3.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
+            _func_3x3.configure(compile_context, input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
              break;
          case DepthwiseConvolutionFunction::GENERIC:
          {
              _func_generic.set_memory_group(_memory_manager);
-            _func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
+            _func_generic.configure(compile_context, input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
          }
          break;
          default:
diff --git a/src/runtime/CL/functions/CLDequantizationLayer.cpp b/src/runtime/CL/functions/CLDequantizationLayer.cpp

index 6e4aab2eb9d3f576b89a5fd0c6c55c2feda9a083..362b36cc95285dab7021efce0f61ac1e0010d529 100644 (file)
--- a/src/runtime/CL/functions/CLDequantizationLayer.cpp
+++ b/src/runtime/CL/functions/CLDequantizationLayer.cpp
@@ -29,9 +29,14 @@
  namespace arm_compute
  {
  void CLDequantizationLayer::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLDequantizationLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLDequantizationLayerKernel>();
-    k->configure(input, output);
+    k->configure(compile_context, input, output);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLDerivative.cpp b/src/runtime/CL/functions/CLDerivative.cpp

index 799724bf2d80a9bfcafc2db77a45079e80fb2a21..68d3752463c41e81940f0c798f7c72f9dc0a7e80 100644 (file)
--- a/src/runtime/CL/functions/CLDerivative.cpp
+++ b/src/runtime/CL/functions/CLDerivative.cpp
@@ -32,9 +32,14 @@
  using namespace arm_compute;
  
  void CLDerivative::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
+}
+
+void CLDerivative::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLDerivativeKernel>();
-    k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+    k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
      _kernel = std::move(k);
-    _border_handler.configure(input, BorderSize(1), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
  }
diff --git a/src/runtime/CL/functions/CLDilate.cpp b/src/runtime/CL/functions/CLDilate.cpp

index 5962c1dc88b91371265493673116a396d68336d6..05351a9de33ed16e77c0c8c5d7af936f9f81d03f 100644 (file)
--- a/src/runtime/CL/functions/CLDilate.cpp
+++ b/src/runtime/CL/functions/CLDilate.cpp
@@ -32,9 +32,14 @@
  using namespace arm_compute;
  
  void CLDilate::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value);
+}
+
+void CLDilate::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLDilateKernel>();
-    k->configure(input, output, border_mode == BorderMode::UNDEFINED);
+    k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
      _kernel = std::move(k);
-    _border_handler.configure(input, BorderSize(1), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
  }
diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp

index 9828cd10ecbbe144c802e39f44d94036d07e5334..6e9782f77a6cd75f90451a26996a688eec7b52fc 100644 (file)
--- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
@@ -38,12 +38,19 @@ CLDirectConvolutionLayer::CLDirectConvolutionLayer()
  }
  
  void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, act_info);
+}
+
+void CLDirectConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+                                         const PadStrideInfo &conv_info,
+                                         const ActivationLayerInfo &act_info)
  {
      // Set GPU target
      _direct_conv_kernel.set_target(CLScheduler::get().target());
  
      // Configure direct convolution
-    _direct_conv_kernel.configure(input, weights, biases, output, conv_info);
+    _direct_conv_kernel.configure(compile_context, input, weights, biases, output, conv_info);
  
      // Configure border handler
      PixelValue &&zero_value(0.f);
@@ -51,7 +58,7 @@ void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weig
      {
          zero_value = PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
      }
-    _input_border_handler.configure(input, _direct_conv_kernel.border_size(), BorderMode::CONSTANT, zero_value);
+    _input_border_handler.configure(compile_context, input, _direct_conv_kernel.border_size(), BorderMode::CONSTANT, zero_value);
  
      // Tune kernels
      CLScheduler::get().tune_kernel_static(_direct_conv_kernel);
@@ -61,7 +68,7 @@ void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weig
      //Configure Activation Layer
      if(_is_activationlayer_enabled)
      {
-        _activationlayer_function.configure(output, nullptr, act_info);
+        _activationlayer_function.configure(compile_context, output, nullptr, act_info);
      }
  }
  
diff --git a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp

index 8d90723c9540eee7d824526c3a2439782e57f7db..da16bed3e0708223b67849746ee56845beaee23e 100644 (file)
--- a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -87,10 +87,10 @@ Status CLDirectDeconvolutionLayer::validate(const ITensorInfo *input, const ITen
      ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_h) != output_shape[idx_h], "Output's height is invalid.");
      ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c], "Output's depth is invalid.");
  
-    unsigned int        deconv_pad_x = 0;
-    unsigned int        deconv_pad_y = 0;
-    const unsigned int  stride_x = info.stride().first;
-    const unsigned int  stride_y = info.stride().second;
+    unsigned int        deconv_pad_x    = 0;
+    unsigned int        deconv_pad_y    = 0;
+    const unsigned int  stride_x        = info.stride().first;
+    const unsigned int  stride_y        = info.stride().second;
      const TensorShape   scale_out_shape = compute_deconvolution_upsampled_shape(*input, *weights, stride_x, stride_y, out_dims, deconv_pad_x, deconv_pad_y);
      TensorInfo          scale_out_info(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape).set_data_layout(data_layout));
      const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
@@ -103,6 +103,12 @@ Status CLDirectDeconvolutionLayer::validate(const ITensorInfo *input, const ITen
  
  void CLDirectDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info,
                                             const WeightsInfo &weights_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, info, weights_info);
+}
+
+void CLDirectDeconvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info,
+                                           const WeightsInfo &weights_info)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
  
@@ -110,8 +116,8 @@ void CLDirectDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights,
      const unsigned int pad_right  = info.pad_right();
      const unsigned int pad_top    = info.pad_top();
      const unsigned int pad_bottom = info.pad_bottom();
-    const unsigned int stride_x = info.stride().first;
-    const unsigned int stride_y = info.stride().second;
+    const unsigned int stride_x   = info.stride().first;
+    const unsigned int stride_y   = info.stride().second;
  
      const DataLayout data_layout = input->info()->data_layout();
  
@@ -121,7 +127,7 @@ void CLDirectDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights,
      _original_weights = weights;
      _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
      _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
-    _flip_weights.configure(weights, &_weights_flipped, &_flip_axis);
+    _flip_weights.configure(compile_context, weights, &_weights_flipped, &_flip_axis);
  
      auto out_dims = deconvolution_output_dimensions(input->info()->dimension(idx_w), input->info()->dimension(idx_h), weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info);
  
@@ -146,14 +152,14 @@ void CLDirectDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights,
      unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0;
      deconv_pad_x -= deconv_pad_left + deconv_pad_right;
      ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0);
-    deconv_pad_left  += deconv_pad_x / 2;
+    deconv_pad_left += deconv_pad_x / 2;
      deconv_pad_right += deconv_pad_x / 2;
  
      unsigned int deconv_pad_top    = pad_bottom > pad_top ? pad_bottom - pad_top : 0;
      unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0;
      deconv_pad_y -= deconv_pad_top + deconv_pad_bottom;
      ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0);
-    deconv_pad_top    += deconv_pad_y / 2;
+    deconv_pad_top += deconv_pad_y / 2;
      deconv_pad_bottom += deconv_pad_y / 2;
  
      TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info());
@@ -162,11 +168,11 @@ void CLDirectDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights,
  
      // configure scale function
      const PadStrideInfo upsample_info(stride_x, stride_y, deconv_pad_left, deconv_pad_right, deconv_pad_top, deconv_pad_bottom, DimensionRoundingType::FLOOR);
-    _scale_f.configure(input, &_scaled_output, upsample_info);
+    _scale_f.configure(compile_context, input, &_scaled_output, upsample_info);
  
      // Setup the function to convolve the upscaled output
      const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
-    _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, weights_info);
+    _conv_f.configure(compile_context, &_scaled_output, &_weights_flipped, bias, output, conv_info, weights_info);
      _scaled_output.allocator()->allocate();
  
      // Setup flip axis data
diff --git a/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp b/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp

index 9955d240f9fc9aef10c8387b1a0c3917e7cc0932..ce615327a95a049bd44234969fbb414ac93ce507 100644 (file)
--- a/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp
+++ b/src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp
@@ -31,9 +31,14 @@
  namespace arm_compute
  {
  void CLRsqrtLayer::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLRsqrtLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLElementWiseUnaryLayerKernel>();
-    k->configure(input, output, ElementWiseUnary::RSQRT);
+    k->configure(compile_context, input, output, ElementWiseUnary::RSQRT);
      _kernel = std::move(k);
  }
  Status CLRsqrtLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
@@ -42,9 +47,14 @@ Status CLRsqrtLayer::validate(const ITensorInfo *input, const ITensorInfo *outpu
  }
  
  void CLExpLayer::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLExpLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLElementWiseUnaryLayerKernel>();
-    k->configure(input, output, ElementWiseUnary::EXP);
+    k->configure(compile_context, input, output, ElementWiseUnary::EXP);
      _kernel = std::move(k);
  }
  Status CLExpLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
@@ -53,9 +63,14 @@ Status CLExpLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
  }
  
  void CLNegLayer::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLNegLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLElementWiseUnaryLayerKernel>();
-    k->configure(input, output, ElementWiseUnary::NEG);
+    k->configure(compile_context, input, output, ElementWiseUnary::NEG);
      _kernel = std::move(k);
  }
  Status CLNegLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
@@ -64,9 +79,14 @@ Status CLNegLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
  }
  
  void CLSinLayer::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLSinLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLElementWiseUnaryLayerKernel>();
-    k->configure(input, output, ElementWiseUnary::SIN);
+    k->configure(compile_context, input, output, ElementWiseUnary::SIN);
      _kernel = std::move(k);
  }
  Status CLSinLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
@@ -75,9 +95,14 @@ Status CLSinLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
  }
  
  void CLAbsLayer::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLAbsLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLElementWiseUnaryLayerKernel>();
-    k->configure(input, output, ElementWiseUnary::ABS);
+    k->configure(compile_context, input, output, ElementWiseUnary::ABS);
      _kernel = std::move(k);
  }
  Status CLAbsLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
@@ -85,9 +110,14 @@ Status CLAbsLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
      return CLElementWiseUnaryLayerKernel::validate(input, output, ElementWiseUnary::ABS);
  }
  void CLLogLayer::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLLogLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLElementWiseUnaryLayerKernel>();
-    k->configure(input, output, ElementWiseUnary::LOG);
+    k->configure(compile_context, input, output, ElementWiseUnary::LOG);
      _kernel = std::move(k);
  }
  Status CLLogLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
@@ -96,9 +126,14 @@ Status CLLogLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
  }
  
  void CLRoundLayer::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLRoundLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLElementWiseUnaryLayerKernel>();
-    k->configure(input, output, ElementWiseUnary::ROUND);
+    k->configure(compile_context, input, output, ElementWiseUnary::ROUND);
      _kernel = std::move(k);
  }
  Status CLRoundLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
diff --git a/src/runtime/CL/functions/CLElementwiseOperations.cpp b/src/runtime/CL/functions/CLElementwiseOperations.cpp

index 7636a87e934f0c7fa43a058de8b0082f587023db..20e9545b61e5ece0b7e000aca5e0bd5be41a2f5a 100644 (file)
--- a/src/runtime/CL/functions/CLElementwiseOperations.cpp
+++ b/src/runtime/CL/functions/CLElementwiseOperations.cpp
@@ -33,7 +33,7 @@ namespace arm_compute
  {
  namespace
  {
-void configure_border_handler(CLFillBorderKernel &border_handler, BorderSize border_size, ICLTensor *input1, ICLTensor *input2, const ICLTensor *output)
+void configure_border_handler(const CLCompileContext &compile_context, CLFillBorderKernel &border_handler, BorderSize border_size, ICLTensor *input1, ICLTensor *input2, const ICLTensor *output)
  {
      if(output->info()->dimension(0) > 1)
      {
@@ -41,18 +41,23 @@ void configure_border_handler(CLFillBorderKernel &border_handler, BorderSize bor
  
          if(broadcasted_info->info()->dimension(0) == 1)
          {
-            border_handler.configure(broadcasted_info, border_size, BorderMode::REPLICATE);
+            border_handler.configure(compile_context, broadcasted_info, border_size, BorderMode::REPLICATE);
          }
      }
  }
  } // namespace
  
  void CLArithmeticAddition::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, policy, act_info);
+}
+
+void CLArithmeticAddition::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLSaturatedArithmeticOperationKernel>();
-    k->configure(ArithmeticOperation::ADD, input1, input2, output, policy, act_info);
+    k->configure(compile_context, ArithmeticOperation::ADD, input1, input2, output, policy, act_info);
      _kernel = std::move(k);
-    configure_border_handler(_border_handler, _kernel->border_size(), input1, input2, output);
+    configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input1, input2, output);
  }
  
  Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
@@ -61,11 +66,16 @@ Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorIn
  }
  
  void CLArithmeticSubtraction::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, policy, act_info);
+}
+
+void CLArithmeticSubtraction::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLSaturatedArithmeticOperationKernel>();
-    k->configure(ArithmeticOperation::SUB, input1, input2, output, policy, act_info);
+    k->configure(compile_context, ArithmeticOperation::SUB, input1, input2, output, policy, act_info);
      _kernel = std::move(k);
-    configure_border_handler(_border_handler, _kernel->border_size(), input1, input2, output);
+    configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input1, input2, output);
  }
  
  Status CLArithmeticSubtraction::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
@@ -75,11 +85,16 @@ Status CLArithmeticSubtraction::validate(const ITensorInfo *input1, const ITenso
  }
  
  void CLArithmeticDivision::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info);
+}
+
+void CLArithmeticDivision::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLArithmeticOperationKernel>();
-    k->configure(ArithmeticOperation::DIV, input1, input2, output, act_info);
+    k->configure(compile_context, ArithmeticOperation::DIV, input1, input2, output, act_info);
      _kernel = std::move(k);
-    configure_border_handler(_border_handler, _kernel->border_size(), input1, input2, output);
+    configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input1, input2, output);
  }
  
  Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
@@ -88,11 +103,16 @@ Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorIn
  }
  
  void CLElementwiseMax::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info);
+}
+
+void CLElementwiseMax::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLArithmeticOperationKernel>();
-    k->configure(ArithmeticOperation::MAX, input1, input2, output, act_info);
+    k->configure(compile_context, ArithmeticOperation::MAX, input1, input2, output, act_info);
      _kernel = std::move(k);
-    configure_border_handler(_border_handler, _kernel->border_size(), input1, input2, output);
+    configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input1, input2, output);
  }
  
  Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
@@ -101,11 +121,16 @@ Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *
  }
  
  void CLElementwiseMin::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info);
+}
+
+void CLElementwiseMin::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLArithmeticOperationKernel>();
-    k->configure(ArithmeticOperation::MIN, input1, input2, output, act_info);
+    k->configure(compile_context, ArithmeticOperation::MIN, input1, input2, output, act_info);
      _kernel = std::move(k);
-    configure_border_handler(_border_handler, _kernel->border_size(), input1, input2, output);
+    configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input1, input2, output);
  }
  
  Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
@@ -114,11 +139,16 @@ Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *
  }
  
  void CLElementwiseSquaredDiff::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info);
+}
+
+void CLElementwiseSquaredDiff::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLArithmeticOperationKernel>();
-    k->configure(ArithmeticOperation::SQUARED_DIFF, input1, input2, output, act_info);
+    k->configure(compile_context, ArithmeticOperation::SQUARED_DIFF, input1, input2, output, act_info);
      _kernel = std::move(k);
-    configure_border_handler(_border_handler, _kernel->border_size(), input1, input2, output);
+    configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input1, input2, output);
  }
  
  Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
@@ -127,11 +157,16 @@ Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITens
  }
  
  void CLElementwisePower::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info);
+}
+
+void CLElementwisePower::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLArithmeticOperationKernel>();
-    k->configure(ArithmeticOperation::POWER, input1, input2, output, act_info);
+    k->configure(compile_context, ArithmeticOperation::POWER, input1, input2, output, act_info);
      _kernel = std::move(k);
-    configure_border_handler(_border_handler, _kernel->border_size(), input1, input2, output);
+    configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input1, input2, output);
  }
  
  Status CLElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
diff --git a/src/runtime/CL/functions/CLEqualizeHistogram.cpp b/src/runtime/CL/functions/CLEqualizeHistogram.cpp

index a0663b754a3ab709d4e384eab3abad3236df73ee..e1bd7e6f2a375ce0edb8a02d7fae49a92b4df617 100644 (file)
--- a/src/runtime/CL/functions/CLEqualizeHistogram.cpp
+++ b/src/runtime/CL/functions/CLEqualizeHistogram.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -89,9 +89,14 @@ CLEqualizeHistogram::CLEqualizeHistogram()
  
  void CLEqualizeHistogram::configure(const ICLImage *input, ICLImage *output)
  {
-    _histogram_kernel.configure(input, &_hist);
-    _border_histogram_kernel.configure(input, &_hist);
-    _map_histogram_kernel.configure(input, &_cd_lut, output);
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLEqualizeHistogram::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output)
+{
+    _histogram_kernel.configure(compile_context, input, &_hist);
+    _border_histogram_kernel.configure(compile_context, input, &_hist);
+    _map_histogram_kernel.configure(compile_context, input, &_cd_lut, output);
  }
  
  void CLEqualizeHistogram::run()
diff --git a/src/runtime/CL/functions/CLErode.cpp b/src/runtime/CL/functions/CLErode.cpp

index 48d7be385de9fa1288c18a9e48d03f05ee530726..810614831660573de7740c8a346e4e973d27df42 100644 (file)
--- a/src/runtime/CL/functions/CLErode.cpp
+++ b/src/runtime/CL/functions/CLErode.cpp
@@ -32,9 +32,14 @@
  using namespace arm_compute;
  
  void CLErode::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value);
+}
+
+void CLErode::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLErodeKernel>();
-    k->configure(input, output, border_mode == BorderMode::UNDEFINED);
+    k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
      _kernel = std::move(k);
-    _border_handler.configure(input, BorderSize(1), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
  }
diff --git a/src/runtime/CL/functions/CLFFT1D.cpp b/src/runtime/CL/functions/CLFFT1D.cpp

index 49b5a2a2e6e29aac5f8047647a1ea086dc597928..c3922f5e66380e6420b170f99bc2cc2f75fba4ed 100644 (file)
--- a/src/runtime/CL/functions/CLFFT1D.cpp
+++ b/src/runtime/CL/functions/CLFFT1D.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -36,6 +36,11 @@ CLFFT1D::CLFFT1D(std::shared_ptr<IMemoryManager> memory_manager)
  }
  
  void CLFFT1D::configure(const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, config);
+}
+
+void CLFFT1D::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
      ARM_COMPUTE_ERROR_THROW_ON(CLFFT1D::validate(input->info(), output->info(), config));
@@ -57,7 +62,7 @@ void CLFFT1D::configure(const ICLTensor *input, ICLTensor *output, const FFT1DIn
      TensorInfo digit_reverse_indices_info(TensorShape(input->info()->tensor_shape()[config.axis]), 1, DataType::U32);
      _digit_reverse_indices.allocator()->init(digit_reverse_indices_info);
      _memory_group.manage(&_digit_reversed_input);
-    _digit_reverse_kernel.configure(input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config);
+    _digit_reverse_kernel.configure(compile_context, input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config);
  
      // Create and configure FFT kernels
      unsigned int Nx = 1;
@@ -72,7 +77,7 @@ void CLFFT1D::configure(const ICLTensor *input, ICLTensor *output, const FFT1DIn
          fft_kernel_info.radix          = radix_for_stage;
          fft_kernel_info.Nx             = Nx;
          fft_kernel_info.is_first_stage = (i == 0);
-        _fft_kernels[i].configure(&_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info);
+        _fft_kernels[i].configure(compile_context, &_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info);
  
          Nx *= radix_for_stage;
      }
@@ -83,7 +88,7 @@ void CLFFT1D::configure(const ICLTensor *input, ICLTensor *output, const FFT1DIn
          FFTScaleKernelInfo scale_config;
          scale_config.scale     = static_cast<float>(N);
          scale_config.conjugate = config.direction == FFTDirection::Inverse;
-        is_c2r ? _scale_kernel.configure(&_digit_reversed_input, output, scale_config) : _scale_kernel.configure(output, nullptr, scale_config);
+        is_c2r ? _scale_kernel.configure(compile_context, &_digit_reversed_input, output, scale_config) : _scale_kernel.configure(output, nullptr, scale_config);
      }
  
      // Allocate tensors
diff --git a/src/runtime/CL/functions/CLFFT2D.cpp b/src/runtime/CL/functions/CLFFT2D.cpp

index f5776cbd889da260e934f9d828fb1980276261da..2482ea901ac505a6d87b140d20155df9319c8397 100644 (file)
--- a/src/runtime/CL/functions/CLFFT2D.cpp
+++ b/src/runtime/CL/functions/CLFFT2D.cpp
@@ -35,6 +35,11 @@ CLFFT2D::CLFFT2D(std::shared_ptr<IMemoryManager> memory_manager)
  }
  
  void CLFFT2D::configure(const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, config);
+}
+
+void CLFFT2D::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
      ARM_COMPUTE_ERROR_THROW_ON(CLFFT2D::validate(input->info(), output->info(), config));
@@ -44,13 +49,13 @@ void CLFFT2D::configure(const ICLTensor *input, ICLTensor *output, const FFT2DIn
      first_pass_config.axis      = config.axis0;
      first_pass_config.direction = config.direction;
      _memory_group.manage(&_first_pass_tensor);
-    _first_pass_func.configure(input, &_first_pass_tensor, first_pass_config);
+    _first_pass_func.configure(compile_context, input, &_first_pass_tensor, first_pass_config);
  
      // Setup second pass
      FFT1DInfo second_pass_config;
      second_pass_config.axis      = config.axis1;
      second_pass_config.direction = config.direction;
-    _second_pass_func.configure(&_first_pass_tensor, output, second_pass_config);
+    _second_pass_func.configure(compile_context, &_first_pass_tensor, output, second_pass_config);
      _first_pass_tensor.allocator()->allocate();
  }
  
diff --git a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp

index afb1cab520d6df03ae58e7a5b186daab35ec0eb3..ff439cca8daabaa4328a31e6bc094ee24b54b339 100644 (file)
--- a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -97,6 +97,12 @@ CLFFTConvolutionLayer::CLFFTConvolutionLayer(std::shared_ptr<IMemoryManager> mem
  
  void CLFFTConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
                                        const ActivationLayerInfo &act_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, act_info);
+}
+
+void CLFFTConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                                      const ActivationLayerInfo &act_info)
  {
      _original_weights = weights;
      _original_bias    = biases;
@@ -121,7 +127,7 @@ void CLFFTConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights
      // Permute bias
      if(biases != nullptr)
      {
-        _permute_bias_func.configure(biases, &_permuted_bias, PermutationVector(1U, 2U, 0U));
+        _permute_bias_func.configure(compile_context, biases, &_permuted_bias, PermutationVector(1U, 2U, 0U));
          _permuted_bias.info()->set_data_layout(DataLayout::NCHW);
      }
  
@@ -131,11 +137,11 @@ void CLFFTConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights
      {
          _memory_group.manage(&_permuted_input);
          // Configure the function to transform the input tensor from NHWC -> NCHW
-        _permute_input_func.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
+        _permute_input_func.configure(compile_context, input, &_permuted_input, PermutationVector(1U, 2U, 0U));
          _permuted_input.info()->set_data_layout(DataLayout::NCHW);
  
          // Configure the function to transform the weights tensor from HWI -> IHW
-        _permute_weights_func.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
+        _permute_weights_func.configure(compile_context, weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
          _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
  
          input_to_use   = &_permuted_input;
@@ -145,20 +151,20 @@ void CLFFTConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights
      // Flip weights
      _flipped_weights.allocator()->init(weights_to_use->info()->clone()->set_is_resizable(true).reset_padding());
      _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
-    _flip_weights_func.configure(weights_to_use, &_flipped_weights, &_flip_axis);
+    _flip_weights_func.configure(compile_context, weights_to_use, &_flipped_weights, &_flip_axis);
  
      // Pad weights
      const PaddingList padding_w = { { 0, input_dims.x() + pad_valid.x() - 1 }, { 0, input_dims.y() + pad_valid.y() - 1 } };
-    _pad_weights_func.configure(&_flipped_weights, &_padded_weights, padding_w);
+    _pad_weights_func.configure(compile_context, &_flipped_weights, &_padded_weights, padding_w);
  
      // Transform weights
      _transform_weights_func = support::cpp14::make_unique<CLFFT2D>();
-    _transform_weights_func->configure(&_padded_weights, &_transformed_weights, FFT2DInfo());
+    _transform_weights_func->configure(compile_context, &_padded_weights, &_transformed_weights, FFT2DInfo());
  
      // Pad input
      const PaddingList padding_in = { { 0, kernel_size.x() + pad_valid.x() - 1 }, { 0, kernel_size.y() + pad_valid.y() - 1 } };
      _memory_group.manage(&_padded_input);
-    _pad_input_func.configure(input_to_use, &_padded_input, padding_in);
+    _pad_input_func.configure(compile_context, input_to_use, &_padded_input, padding_in);
      if(_needs_permute)
      {
          _permuted_input.allocator()->allocate();
@@ -166,17 +172,17 @@ void CLFFTConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights
  
      // Transform input
      _memory_group.manage(&_transformed_input);
-    _transform_input_func.configure(&_padded_input, &_transformed_input, FFT2DInfo());
+    _transform_input_func.configure(compile_context, &_padded_input, &_transformed_input, FFT2DInfo());
      _padded_input.allocator()->allocate();
  
      // Perform product
      _memory_group.manage(&_output_product);
-    _prod_func.configure(&_transformed_input, &_transformed_weights, &_output_product);
+    _prod_func.configure(compile_context, &_transformed_input, &_transformed_weights, &_output_product);
      _transformed_input.allocator()->allocate();
  
      // Perform reduction
      _memory_group.manage(&_output_reduced);
-    _reduce_func.configure(&_output_product, &_output_reduced, 2, ReductionOperation::SUM);
+    _reduce_func.configure(compile_context, &_output_product, &_output_reduced, 2, ReductionOperation::SUM);
      _output_product.allocator()->allocate();
  
      // Transform output
@@ -184,7 +190,7 @@ void CLFFTConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights
      FFT2DInfo itranform_info;
      itranform_info.direction = FFTDirection::Inverse;
      _itransformed_output.allocator()->init(_output_reduced.info()->clone()->set_is_resizable(true).set_num_channels(1).reset_padding());
-    _itransform_output_func.configure(&_output_reduced, &_itransformed_output, itranform_info);
+    _itransform_output_func.configure(compile_context, &_output_reduced, &_itransformed_output, itranform_info);
      _output_reduced.allocator()->allocate();
  
      // Reshape output
@@ -206,7 +212,7 @@ void CLFFTConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights
          output_to_use = &_permuted_output;
          _memory_group.manage(&_permuted_output);
      }
-    _extract_output_func.configure(&_reshaped_output, output_to_use, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
+    _extract_output_func.configure(compile_context, &_reshaped_output, output_to_use, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
      _itransformed_output.allocator()->allocate();
  
      // Add bias
@@ -219,7 +225,7 @@ void CLFFTConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights
              _memory_group.manage(&_permuted_output);
          }
          auto_init_if_empty(*output_to_use->info(), *_bias_output.info());
-        _bias_add_func.configure(&_bias_output, &_permuted_bias, output_to_use, ConvertPolicy::WRAP);
+        _bias_add_func.configure(compile_context, &_bias_output, &_permuted_bias, output_to_use, ConvertPolicy::WRAP);
          _bias_output.allocator()->allocate();
      }
  
@@ -228,7 +234,7 @@ void CLFFTConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights
      {
          // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
          _permuted_output.info()->set_data_layout(DataLayout::NCHW);
-        _permute_output_func.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
+        _permute_output_func.configure(compile_context, &_permuted_output, output, PermutationVector(2U, 0U, 1U));
  
          // Allocate tensors
          _permuted_output.allocator()->allocate();
@@ -238,7 +244,7 @@ void CLFFTConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights
      _is_activationlayer_enabled = act_info.enabled();
      if(_is_activationlayer_enabled)
      {
-        _activation_layer_func.configure(output, nullptr, act_info);
+        _activation_layer_func.configure(compile_context, output, nullptr, act_info);
      }
  
      // Setup flip axis data
diff --git a/src/runtime/CL/functions/CLFastCorners.cpp b/src/runtime/CL/functions/CLFastCorners.cpp

index fe2a18cd302f284cdcc71a4ce4958f3f57ffa1c8..f51abf0880e6283ef886ea44f397c023a1f3e501 100644 (file)
--- a/src/runtime/CL/functions/CLFastCorners.cpp
+++ b/src/runtime/CL/functions/CLFastCorners.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -54,6 +54,12 @@ CLFastCorners::CLFastCorners(std::shared_ptr<IMemoryManager> memory_manager)
  
  void CLFastCorners::configure(const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners,
                                unsigned int *num_corners, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, threshold, nonmax_suppression, corners, num_corners, border_mode, constant_border_value);
+}
+
+void CLFastCorners::configure(const CLCompileContext &compile_context, const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners,
+                              unsigned int *num_corners, BorderMode border_mode, uint8_t constant_border_value)
  {
      ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
      ARM_COMPUTE_ERROR_ON(BorderMode::UNDEFINED != border_mode);
@@ -72,19 +78,19 @@ void CLFastCorners::configure(const ICLImage *input, float threshold, bool nonma
      const bool update_number = (nullptr != _num_corners);
  
      _memory_group.manage(&_output);
-    _fast_corners_kernel.configure(input, &_output, threshold, nonmax_suppression, border_mode);
+    _fast_corners_kernel.configure(compile_context, input, &_output, threshold, nonmax_suppression, border_mode);
  
      if(!_non_max)
      {
-        _copy_array_kernel.configure(&_output, update_number, _corners, &_num_buffer);
+        _copy_array_kernel.configure(compile_context, &_output, update_number, _corners, &_num_buffer);
      }
      else
      {
          _suppr.allocator()->init(tensor_info);
          _memory_group.manage(&_suppr);
  
-        _suppr_func.configure(&_output, &_suppr, border_mode);
-        _copy_array_kernel.configure(&_suppr, update_number, _corners, &_num_buffer);
+        _suppr_func.configure(compile_context, &_output, &_suppr, border_mode);
+        _copy_array_kernel.configure(compile_context, &_suppr, update_number, _corners, &_num_buffer);
  
          _suppr.allocator()->allocate();
      }
diff --git a/src/runtime/CL/functions/CLFill.cpp b/src/runtime/CL/functions/CLFill.cpp

index 035bb7ce8d3a1cf89fb03e34d2f2ef173158914c..7b96ed1592c812231bdeee7d50e62103fb984d50 100644 (file)
--- a/src/runtime/CL/functions/CLFill.cpp
+++ b/src/runtime/CL/functions/CLFill.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -30,11 +30,15 @@
  
  namespace arm_compute
  {
-
  void CLFill::configure(ICLTensor *tensor, PixelValue constant_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), tensor, constant_value);
+}
+
+void CLFill::configure(const CLCompileContext &compile_context, ICLTensor *tensor, PixelValue constant_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLMemsetKernel>();
-    k->configure(tensor, constant_value);
+    k->configure(compile_context, tensor, constant_value);
      _kernel = std::move(k);
  }
  } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLFillBorder.cpp b/src/runtime/CL/functions/CLFillBorder.cpp

index b7d77bcbb7401bebeecd27c2266a2196eee08507..f9d7396c5b9b68c8f9a64f3e2f6f51faa96d88de 100644 (file)
--- a/src/runtime/CL/functions/CLFillBorder.cpp
+++ b/src/runtime/CL/functions/CLFillBorder.cpp
@@ -31,8 +31,13 @@
  using namespace arm_compute;
  
  void CLFillBorder::configure(ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), tensor, border_width, border_mode, constant_border_value);
+}
+
+void CLFillBorder::configure(const CLCompileContext &compile_context, ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLFillBorderKernel>();
-    k->configure(tensor, BorderSize(border_width), border_mode, constant_border_value);
+    k->configure(compile_context, tensor, BorderSize(border_width), border_mode, constant_border_value);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLFlattenLayer.cpp b/src/runtime/CL/functions/CLFlattenLayer.cpp

index 8f93e03c5e76644f3771d0697987880642eeba8f..9a247ccfcb07b55bbf58fcf16cf2e063a8d75b68 100644 (file)
--- a/src/runtime/CL/functions/CLFlattenLayer.cpp
+++ b/src/runtime/CL/functions/CLFlattenLayer.cpp
@@ -30,9 +30,14 @@
  using namespace arm_compute;
  
  void CLFlattenLayer::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLFlattenLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLFlattenLayerKernel>();
-    k->configure(input, output);
+    k->configure(compile_context, input, output);
      _kernel = std::move(k);
      CLScheduler::get().tune_kernel_static(*_kernel);
  }
diff --git a/src/runtime/CL/functions/CLFloor.cpp b/src/runtime/CL/functions/CLFloor.cpp

index 204ca7400c782be3ba31d0554d042ce94ee5be02..44e1d39dc2154cf5026689474196beb8346641ba 100644 (file)
--- a/src/runtime/CL/functions/CLFloor.cpp
+++ b/src/runtime/CL/functions/CLFloor.cpp
@@ -33,7 +33,7 @@ void CLFloor::configure(const ICLTensor *input, ICLTensor *output)
      configure(CLKernelLibrary::get().get_compile_context(), input, output);
  }
  
-void CLFloor::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
+void CLFloor::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLFloorKernel>();
      k->configure(compile_context, input, output);
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp

index 0c0fbd5c9d7dbfae41154f3e3845d78dbe4cec8b..ecbac6f703dc2228e6b7c8149eddea5e6fb88442 100644 (file)
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -146,9 +146,14 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
  } // namespace
  
  void CLFullyConnectedLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLFullyConnectedLayerReshapeWeights::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>();
-    k->configure(input, output);
+    k->configure(compile_context, input, output);
      _kernel = std::move(k);
  }
  
@@ -163,7 +168,8 @@ CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> mem
        _are_weights_reshaped(true), _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
  {
  }
-void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info)
+void CLFullyConnectedLayer::configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
+                                         const FullyConnectedLayerInfo &fc_info)
  {
      GEMMLowpOutputStageInfo gemmlowp_output_stage;
      construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(), gemmlowp_output_stage, fc_info.activation_info);
@@ -190,7 +196,7 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor
          weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
  
          // Configure gemmlowp function
-        _mm_gemmlowp.configure(input, weights, bias, output, gemm_info);
+        _mm_gemmlowp.configure(compile_context, input, weights, bias, output, gemm_info);
  
          // Revert back QuantizatioInfo as input and weights could be used in other fully connected layers
          input->info()->set_quantization_info(input_quantization_info);
@@ -199,11 +205,12 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor
      else
      {
          // Configure matrix multiply kernel
-        _mm_gemm.configure(input, weights, bias, output, 1.f, 1.f, gemm_info);
+        _mm_gemm.configure(compile_context, input, weights, bias, output, 1.f, 1.f, gemm_info);
      }
  }
  
-void CLFullyConnectedLayer::configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info)
+void CLFullyConnectedLayer::configure_conv_fc(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
+                                              const FullyConnectedLayerInfo &fc_info)
  {
      ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
  
@@ -215,25 +222,32 @@ void CLFullyConnectedLayer::configure_conv_fc(const ICLTensor *input, const ICLT
  
      // Configure flatten kernel
      _memory_group.manage(&_flatten_output);
-    _flatten_layer.configure(input, &_flatten_output);
+    _flatten_layer.configure(compile_context, input, &_flatten_output);
  
      // Configure matrix multiply kernel
-    configure_mm(&_flatten_output, weights, bias, output, fc_info);
+    configure_mm(compile_context, &_flatten_output, weights, bias, output, fc_info);
  
      // Allocate the output tensor for flatten once all the configure methods have been called
      _flatten_output.allocator()->allocate();
  }
  
-void CLFullyConnectedLayer::configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info)
+void CLFullyConnectedLayer::configure_fc_fc(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
+                                            const FullyConnectedLayerInfo &fc_info)
  {
      ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
  
      // Configure matrix multiply kernel
-    configure_mm(input, weights, bias, output, fc_info);
+    configure_mm(compile_context, input, weights, bias, output, fc_info);
  }
  
  void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
                                        FullyConnectedLayerInfo fc_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, fc_info);
+}
+
+void CLFullyConnectedLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+                                      FullyConnectedLayerInfo fc_info)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
  
@@ -282,13 +296,13 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w
      {
          if(_weights_manager && _weights_manager->are_weights_managed(weights))
          {
-            _reshape_weights_managed_function.configure(weights);
+            _reshape_weights_managed_function.configure(compile_context, weights);
              weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed_function));
          }
          else
          {
              // Reshape the weights
-            _reshape_weights_function.configure(weights, &_reshape_weights_output);
+            _reshape_weights_function.configure(compile_context, weights, &_reshape_weights_output);
              weights_to_use = &_reshape_weights_output;
          }
      }
@@ -298,7 +312,7 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w
      {
          if(_weights_manager && _weights_manager->are_weights_managed(weights_to_use))
          {
-            _convert_weights_managed.configure(weights_to_use,
+            _convert_weights_managed.configure(compile_context, weights_to_use,
                                                 input->info()->tensor_shape(),
                                                 fc_info.weights_trained_layout);
              weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_convert_weights_managed));
@@ -306,7 +320,7 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w
          else
          {
              // Convert weights
-            _convert_weights.configure(weights_to_use,
+            _convert_weights.configure(compile_context, weights_to_use,
                                         &_converted_weights_output,
                                         input->info()->tensor_shape(),
                                         fc_info.weights_trained_layout);
@@ -319,12 +333,12 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w
      if(_is_fc_after_conv)
      {
          // Fully Connected layer after a Convolution Layer without batches
-        configure_conv_fc(input, weights_to_use, biases, output, fc_info);
+        configure_conv_fc(compile_context, input, weights_to_use, biases, output, fc_info);
      }
      else
      {
          // Fully Connected layer after a Fully Connected Layer without batches
-        configure_fc_fc(input, weights_to_use, biases, output, fc_info);
+        configure_fc_fc(compile_context, input, weights_to_use, biases, output, fc_info);
      }
  }
  
diff --git a/src/runtime/CL/functions/CLFuseBatchNormalization.cpp b/src/runtime/CL/functions/CLFuseBatchNormalization.cpp

index 72dd27e3cc038bb0f3107ccfc19686044b1b1cbc..6deecdc089df20c0eb16d3077972fe726f156221 100644 (file)
--- a/src/runtime/CL/functions/CLFuseBatchNormalization.cpp
+++ b/src/runtime/CL/functions/CLFuseBatchNormalization.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -41,7 +41,15 @@ void CLFuseBatchNormalization::configure(const ICLTensor *input_weights, const I
                                           const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma,
                                           float epsilon, FuseBatchNormalizationType fbn_type)
  {
-    _fuse_bn_kernel.configure(input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
+    configure(CLKernelLibrary::get().get_compile_context(), input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
+}
+
+void CLFuseBatchNormalization::configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var,
+                                         ICLTensor *fused_weights, ICLTensor *fused_bias,
+                                         const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma,
+                                         float epsilon, FuseBatchNormalizationType fbn_type)
+{
+    _fuse_bn_kernel.configure(compile_context, input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
  }
  
  Status CLFuseBatchNormalization::validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp

index 74d59cdad1fd810039acbacd80074adf436e704b..8466024c04e38a77ca9e69bcefcfde253868e437 100644 (file)
--- a/src/runtime/CL/functions/CLGEMM.cpp
+++ b/src/runtime/CL/functions/CLGEMM.cpp
@@ -81,7 +81,8 @@ CLGEMMKernelType CLGEMM::select_gemm_kernel(unsigned int m, unsigned int n, unsi
      return gemm_kernel->select_kernel(params);
  }
  
-void CLGEMM::configure_native_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)
+void CLGEMM::configure_native_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta,
+                                 const GEMMInfo &gemm_info)
  {
      const unsigned int m          = gemm_info.reinterpret_input_as_3d() ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);
      const unsigned int n          = b->info()->dimension(0);
@@ -94,13 +95,14 @@ void CLGEMM::configure_native_v1(const ICLTensor *a, const ICLTensor *b, const I
      GEMMReshapeInfo reshape_info(m, n, k, 1, 1, gemm_info.depth_output_gemm3d(), gemm_info.reinterpret_input_as_3d(), gemm_info.broadcast_bias());
  
      // Configure and tune matrix multiply kernel
-    _mm_kernel.configure(a, b, c, output, alpha, beta, false, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
+    _mm_kernel.configure(compile_context, a, b, c, output, alpha, beta, false, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
  
      // Tune kernel statically
      CLScheduler::get().tune_kernel_static(_mm_kernel);
  }
  
-void CLGEMM::configure_reshaped_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)
+void CLGEMM::configure_reshaped_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta,
+                                   const GEMMInfo &gemm_info)
  {
      bool               reinterpret_input_as_3d   = gemm_info.reinterpret_input_as_3d();
      const unsigned int m                         = reinterpret_input_as_3d ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);
@@ -148,22 +150,22 @@ void CLGEMM::configure_reshaped_v1(const ICLTensor *a, const ICLTensor *b, const
      }
  
      // Configure interleave kernel
-    _reshape_lhs_kernel.configure(a, &_tmp_a, lhs_info, reinterpret_input_as_3d);
+    _reshape_lhs_kernel.configure(compile_context, a, &_tmp_a, lhs_info, reinterpret_input_as_3d);
  
      // Configure transpose kernel
      ICLTensor *reshaped_rhs = &_tmp_b;
      if(_weights_manager && _weights_manager->are_weights_managed(b))
      {
-        _reshape_rhs_kernel_managed.configure(b, rhs_info);
+        _reshape_rhs_kernel_managed.configure(compile_context, b, rhs_info);
          reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, &_reshape_rhs_kernel_managed));
      }
      else
      {
-        _reshape_rhs_kernel.configure(b, &_tmp_b, rhs_info);
+        _reshape_rhs_kernel.configure(compile_context, b, &_tmp_b, rhs_info);
      }
  
      // Configure and tune matrix multiply kernel
-    _mm_kernel.configure(&_tmp_a, reshaped_rhs, c, output, alpha, beta, true, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
+    _mm_kernel.configure(compile_context, &_tmp_a, reshaped_rhs, c, output, alpha, beta, true, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
  
      CLScheduler::get().tune_kernel_static(_mm_kernel);
  
@@ -176,7 +178,8 @@ void CLGEMM::configure_reshaped_v1(const ICLTensor *a, const ICLTensor *b, const
      }
  }
  
-void CLGEMM::configure_reshaped(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)
+void CLGEMM::configure_reshaped_v2(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta,
+                                   const GEMMInfo &gemm_info)
  {
      DataType           data_type               = a->info()->data_type();
      bool               reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
@@ -223,21 +226,21 @@ void CLGEMM::configure_reshaped(const ICLTensor *a, const ICLTensor *b, const IC
      // Configure lhs_info and rhs_info
      std::tie(lhs_info, rhs_info) = gemm_config->configure(m, n, k, batch_size, data_type);
  
-    _reshape_lhs_kernel.configure(a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d());
+    _reshape_lhs_kernel.configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d());
  
      ICLTensor *reshaped_rhs = &_tmp_b;
      if(_weights_manager && _weights_manager->are_weights_managed(b))
      {
-        _reshape_rhs_kernel_managed.configure(b, rhs_info);
+        _reshape_rhs_kernel_managed.configure(compile_context, b, rhs_info);
          reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, &_reshape_rhs_kernel_managed));
      }
      else
      {
-        _reshape_rhs_kernel.configure(b, &_tmp_b, rhs_info);
+        _reshape_rhs_kernel.configure(compile_context, b, &_tmp_b, rhs_info);
      }
  
      // Configure and tune matrix multiply kernel
-    _mm_reshaped_kernel.configure(&_tmp_a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
+    _mm_reshaped_kernel.configure(compile_context, &_tmp_a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
  
      // Allocate intermediate tensors
      _tmp_a.allocator()->allocate();
@@ -248,7 +251,8 @@ void CLGEMM::configure_reshaped(const ICLTensor *a, const ICLTensor *b, const IC
      }
  }
  
-void CLGEMM::configure_reshaped_only_rhs(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)
+void CLGEMM::configure_reshaped_only_rhs(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta,
+                                         const GEMMInfo &gemm_info)
  {
      DataType           data_type               = a->info()->data_type();
      bool               reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
@@ -293,16 +297,16 @@ void CLGEMM::configure_reshaped_only_rhs(const ICLTensor *a, const ICLTensor *b,
      ICLTensor *reshaped_rhs = &_tmp_b;
      if(_weights_manager && _weights_manager->are_weights_managed(b))
      {
-        _reshape_rhs_kernel_managed.configure(b, rhs_info);
+        _reshape_rhs_kernel_managed.configure(compile_context, b, rhs_info);
          reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, &_reshape_rhs_kernel_managed));
      }
      else
      {
-        _reshape_rhs_kernel.configure(b, &_tmp_b, rhs_info);
+        _reshape_rhs_kernel.configure(compile_context, b, &_tmp_b, rhs_info);
      }
  
      // Configure and tune matrix multiply kernel
-    _mm_reshaped_only_rhs_kernel.configure(a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
+    _mm_reshaped_only_rhs_kernel.configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
  
      if(!_reshape_b_only_on_first_run && use_mm_b)
      {
@@ -483,6 +487,11 @@ Status CLGEMM::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInf
  }
  
  void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), a, b, c, output, alpha, beta, gemm_info);
+}
+
+void CLGEMM::configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
  
@@ -511,22 +520,22 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *
      {
          case CLGEMMKernelType::NATIVE_V1:
          {
-            configure_native_v1(a, b, c_to_use, output, alpha, beta, gemm_info);
+            configure_native_v1(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);
              break;
          }
          case CLGEMMKernelType::RESHAPED_V1:
          {
-            configure_reshaped_v1(a, b, c_to_use, output, alpha, beta, gemm_info);
+            configure_reshaped_v1(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);
              break;
          }
          case CLGEMMKernelType::RESHAPED:
          {
-            configure_reshaped(a, b, c_to_use, output, alpha, beta, gemm_info);
+            configure_reshaped_v2(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);
              break;
          }
          case CLGEMMKernelType::RESHAPED_ONLY_RHS:
          {
-            configure_reshaped_only_rhs(a, b, c_to_use, output, alpha, beta, gemm_info);
+            configure_reshaped_only_rhs(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);
              break;
          }
          default:
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp

index 5398050533ad6d462b9f4b9a801da505fa61f491..1c37993bdace26b81e84a295ab6ed15f4e8443ce 100644 (file)
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -47,6 +47,11 @@ CLConvolutionLayerReshapeWeights::CLConvolutionLayerReshapeWeights()
  }
  
  void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), weights, biases, output, num_groups);
+}
+
+void CLConvolutionLayerReshapeWeights::configure(const CLCompileContext &compile_context, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups)
  {
      // Perform validation step
      ARM_COMPUTE_ERROR_ON_NULLPTR(weights, output);
@@ -58,7 +63,7 @@ void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const
      const bool       append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type());
      const ICLTensor *biases_to_use = (append_biases) ? biases : nullptr;
  
-    _weights_reshape_kernel.configure(weights, biases_to_use, output, num_groups);
+    _weights_reshape_kernel.configure(compile_context, weights, biases_to_use, output, num_groups);
  
      output->info()->set_quantization_info(weights->info()->quantization_info());
  }
@@ -100,7 +105,8 @@ CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> m
  {
  }
  
-void CLGEMMConvolutionLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
+void CLGEMMConvolutionLayer::configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+                                          const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
                                            int gemm_3d_depth, const ActivationLayerInfo &act_info)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights);
@@ -127,7 +133,7 @@ void CLGEMMConvolutionLayer::configure_mm(const ICLTensor *input, const ICLTenso
          input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
          weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
  
-        _mm_gemmlowp.configure(input, weights, biases, output, gemm_info);
+        _mm_gemmlowp.configure(compile_context, input, weights, biases, output, gemm_info);
  
          // Revert back QuantizatioInfo as input and weights could be used in other convolution layers
          input->info()->set_quantization_info(input_quantization_info);
@@ -136,7 +142,7 @@ void CLGEMMConvolutionLayer::configure_mm(const ICLTensor *input, const ICLTenso
      else
      {
          // Configure matrix multiply function
-        _mm_gemm.configure(input, weights, biases, output, 1.0f, 1.0f, gemm_info);
+        _mm_gemm.configure(compile_context, input, weights, biases, output, 1.0f, 1.0f, gemm_info);
      }
  }
  
@@ -180,6 +186,13 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens
  
  void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
                                         const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups);
+}
+
+void CLGEMMConvolutionLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+                                       const PadStrideInfo &conv_info,
+                                       const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
  
@@ -252,24 +265,24 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
  
          if(_weights_manager && _weights_manager->are_weights_managed(weights))
          {
-            _reshape_weights_managed.configure(weights, biases, num_groups);
+            _reshape_weights_managed.configure(compile_context, weights, biases, num_groups);
              weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed));
          }
          else
          {
-            _reshape_weights.configure(weights, biases, &_weights_reshaped, num_groups);
+            _reshape_weights.configure(compile_context, weights, biases, &_weights_reshaped, num_groups);
          }
      }
      else
      {
          if(_weights_manager && _weights_manager->are_weights_managed(weights))
          {
-            _reshape_weights_managed.configure(weights, nullptr, num_groups);
+            _reshape_weights_managed.configure(compile_context, weights, nullptr, num_groups);
              weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed));
          }
          else
          {
-            _reshape_weights.configure(weights, nullptr, &_weights_reshaped, num_groups);
+            _reshape_weights.configure(compile_context, weights, nullptr, &_weights_reshaped, num_groups);
          }
      }
  
@@ -279,7 +292,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
          _memory_group.manage(&_im2col_output);
  
          // Configure and tune im2col. im2col output shape is auto-initialized
-        _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation, num_groups);
+        _im2col_kernel.configure(compile_context, input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation, num_groups);
  
          // Set quantization info
          _im2col_output.info()->set_quantization_info(input->info()->quantization_info());
@@ -367,7 +380,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
      // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix
      const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0;
  
-    configure_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, act_info);
+    configure_mm(compile_context, gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, act_info);
  
      if(!_skip_im2col)
      {
@@ -377,7 +390,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
      if(!_skip_col2im)
      {
          // Configure and tune Col2Im
-        _col2im_kernel.configure(gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups);
+        _col2im_kernel.configure(compile_context, gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups);
          CLScheduler::get().tune_kernel_static(_col2im_kernel);
      }
  
@@ -391,7 +404,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
  
      if(!_fuse_activation)
      {
-        _activationlayer_function.configure(output, nullptr, act_info);
+        _activationlayer_function.configure(compile_context, output, nullptr, act_info);
      }
  
      ARM_COMPUTE_UNUSED(weights_info);
diff --git a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp

index 32988582154067d6dd56905e3c50686fd62f1b50..1dcb341fe7afbcf5445fda7db2b778e889d410f0 100644 (file)
--- a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
@@ -28,7 +28,6 @@
  #include "arm_compute/core/utils/misc/ShapeCalculator.h"
  #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
  #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "utils/TypePrinter.h"
  
  #include <memory>
  #include <tuple>
@@ -64,29 +63,29 @@ std::pair<Coordinates, Coordinates> compute_start_end_slice_coordinates(const IT
  }
  Status construct_gemmlowp_output_stage(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, GEMMLowpOutputStageInfo &output_stage_info)
  {
-        const auto data_type = input->data_type();
+    const auto data_type = input->data_type();
  
-        if(is_data_type_quantized_asymmetric(data_type))
-        {
-            const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
-            const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
-            const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
-
-            float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
-            int   output_multiplier(0);
-            int   output_shift(0);
-            ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
-
-            output_stage_info.type                = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
-            output_stage_info.gemmlowp_multiplier = output_multiplier;
-            output_stage_info.gemmlowp_shift      = output_shift;
-            output_stage_info.gemmlowp_offset     = oq_info.offset;
-            const auto min_max_bound              = get_min_max(data_type);
-            output_stage_info.gemmlowp_min_bound  = (std::get<0>(min_max_bound)).get<int32_t>();
-            output_stage_info.gemmlowp_max_bound  = (std::get<1>(min_max_bound)).get<int32_t>();
-            output_stage_info.output_data_type    = data_type;
-        }
-        return Status{};
+    if(is_data_type_quantized_asymmetric(data_type))
+    {
+        const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
+        const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
+        const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
+
+        float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
+        int   output_multiplier(0);
+        int   output_shift(0);
+        ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
+
+        output_stage_info.type                = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
+        output_stage_info.gemmlowp_multiplier = output_multiplier;
+        output_stage_info.gemmlowp_shift      = output_shift;
+        output_stage_info.gemmlowp_offset     = oq_info.offset;
+        const auto min_max_bound              = get_min_max(data_type);
+        output_stage_info.gemmlowp_min_bound  = (std::get<0>(min_max_bound)).get<int32_t>();
+        output_stage_info.gemmlowp_max_bound  = (std::get<1>(min_max_bound)).get<int32_t>();
+        output_stage_info.output_data_type    = data_type;
+    }
+    return Status{};
  }
  
  } // namespace
@@ -175,7 +174,6 @@ Status CLGEMMDeconvolutionLayer::validate(const ITensorInfo *input, const ITenso
          ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(&input->clone()->set_tensor_shape(nhwc_input_shape), &reshaped_t_info, nullptr, &gemm_output_info.set_data_type(DataType::S32),
                                                                             gemm_info));
          ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(input, weights, output, output_stage_info));
-
      }
      else
      {
@@ -214,6 +212,12 @@ Status CLGEMMDeconvolutionLayer::validate(const ITensorInfo *input, const ITenso
  }
  
  void CLGEMMDeconvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, deconv_info);
+}
+
+void CLGEMMDeconvolutionLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
+                                         const PadStrideInfo &deconv_info)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
      ARM_COMPUTE_ERROR_THROW_ON(CLGEMMDeconvolutionLayer::validate(input->info(),
@@ -237,9 +241,9 @@ void CLGEMMDeconvolutionLayer::configure(const ICLTensor *input, const ICLTensor
      if(_is_nchw)
      {
          _memory_group.manage(&_permuted_input);
-        _permute_input_to_nhwc.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
+        _permute_input_to_nhwc.configure(compile_context, input, &_permuted_input, PermutationVector(2U, 0U, 1U));
  
-        _permute_weights_to_nhwc.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
+        _permute_weights_to_nhwc.configure(compile_context, weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
  
          input_to_use   = &_permuted_input;
          weights_to_use = &_permuted_weights;
@@ -251,8 +255,8 @@ void CLGEMMDeconvolutionLayer::configure(const ICLTensor *input, const ICLTensor
                                                     1,
                                                     input->info()->data_type(), weights->info()->quantization_info()));
  
-    _reshape_weights.configure(weights_to_use, &_reshaped_weights);
-    _transpose_weights.configure(&_reshaped_weights, &_reshaped_weights_t);
+    _reshape_weights.configure(compile_context, weights_to_use, &_reshaped_weights);
+    _transpose_weights.configure(compile_context, &_reshaped_weights, &_reshaped_weights_t);
  
      const size_t idx_h = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);
      GEMMInfo     gemm_info(false, false, true, input->info()->dimension(idx_h), true);
@@ -268,14 +272,14 @@ void CLGEMMDeconvolutionLayer::configure(const ICLTensor *input, const ICLTensor
          input_to_use->info()->set_quantization_info(QuantizationInfo(iq_info.uniform().scale, -iq_info.uniform().offset));
          _reshaped_weights_t.info()->set_quantization_info(QuantizationInfo(wq_info.uniform().scale, -wq_info.uniform().offset));
  
-        _mm_gemmlowp.configure(input_to_use, &_reshaped_weights_t, nullptr, &_gemm_output, gemm_info);
+        _mm_gemmlowp.configure(compile_context, input_to_use, &_reshaped_weights_t, nullptr, &_gemm_output, gemm_info);
  
          input_to_use->info()->set_quantization_info(iq_info);
          _reshaped_weights_t.info()->set_quantization_info(wq_info);
      }
      else
      {
-        _mm_gemm.configure(input_to_use, &_reshaped_weights_t, nullptr, &_gemm_output, 1.f, 0.0f, gemm_info);
+        _mm_gemm.configure(compile_context, input_to_use, &_reshaped_weights_t, nullptr, &_gemm_output, 1.f, 0.0f, gemm_info);
      }
  
      if(_is_nchw)
@@ -313,14 +317,14 @@ void CLGEMMDeconvolutionLayer::configure(const ICLTensor *input, const ICLTensor
      }
  
      // Configure a Col2Im call to reshape the output of GEMM
-    _deconv_reshape.configure(&_gemm_output, bias, deconv_reshape_output, input->info(), weights->info(), deconv_info);
+    _deconv_reshape.configure(compile_context, &_gemm_output, bias, deconv_reshape_output, input->info(), weights->info(), deconv_info);
      _gemm_output.allocator()->allocate();
  
      if(_is_quantized)
      {
          GEMMLowpOutputStageInfo output_stage_info;
          construct_gemmlowp_output_stage(input->info(), weights->info(), output->info(), output_stage_info);
-        _gemmlowp_output_stage.configure(&_gemmlowp_final, nullptr, output_stage_output, output_stage_info);
+        _gemmlowp_output_stage.configure(compile_context, &_gemmlowp_final, nullptr, output_stage_output, output_stage_info);
          _gemmlowp_final.allocator()->allocate();
      }
  
@@ -328,7 +332,7 @@ void CLGEMMDeconvolutionLayer::configure(const ICLTensor *input, const ICLTensor
      if(_padded_input)
      {
          const auto start_end = compute_start_end_slice_coordinates(*deconv_reshape_output->info(), deconv_info, _is_nchw);
-        _slice_gemm.configure(&_slice_gemm_input, slice_output, start_end.first, start_end.second);
+        _slice_gemm.configure(compile_context, &_slice_gemm_input, slice_output, start_end.first, start_end.second);
          _slice_gemm_input.allocator()->allocate();
      }
  }
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp

index 3465da95b7f4df1c96762b78c4692b292d81910c..84da4a7e98202881afcad2767191d673e94d69e1 100644 (file)
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -100,6 +100,11 @@ CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemo
  }
  
  void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), a, b, c, output, gemm_info);
+}
+
+void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
      ARM_COMPUTE_ERROR_THROW_ON(CLGEMMLowpMatrixMultiplyCore::validate(a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info));
@@ -144,7 +149,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
          TensorInfo weights_info(*b->info());
          weights_info.set_data_type(DataType::QASYMM8);
          _qasymm8_weights.allocator()->init(weights_info);
-        _weights_to_qasymm8.configure(b, &_qasymm8_weights, ConvertPolicy::WRAP, 0);
+        _weights_to_qasymm8.configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0);
      }
  
      const ICLTensor *matrix_b = _convert_to_qasymm8 ? &_qasymm8_weights : b;
@@ -162,7 +167,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
          std::tie(lhs_info, rhs_info) = CLGEMMReshapedOnlyRHSKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8);
  
          // Configure reshape RHS kernel
-        _mtx_b_reshape_kernel.configure(_convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info);
+        _mtx_b_reshape_kernel.configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info);
      }
  
      // Using default reduction info
@@ -179,7 +184,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
          }
  
          // Configure Matrix B reduction kernel
-        _mtx_b_reduction_kernel.configure(_convert_to_qasymm8 ? &_qasymm8_weights : b, &_vector_sum_col, reduction_info);
+        _mtx_b_reduction_kernel.configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_vector_sum_col, reduction_info);
      }
  
      // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0
@@ -190,7 +195,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
          _memory_group.manage(&_vector_sum_row);
  
          // Configure matrix A reduction kernel
-        _mtx_a_reduction_kernel.configure(a, &_vector_sum_row, reduction_info);
+        _mtx_a_reduction_kernel.configure(compile_context, a, &_vector_sum_row, reduction_info);
      }
  
      GEMMKernelInfo gemm_kernel_info;
@@ -220,7 +225,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
          if(_is_gemm_reshaped && gemmlowp_output_stage.type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
          {
              // Configure and tune matrix multiply kernel with fused output stage
-            _mm_reshaped_only_rhs_kernel.configure(_matrix_a, matrix_b, output, gemm_kernel_info, _a_offset == 0 ? nullptr : &_vector_sum_col,
+            _mm_reshaped_only_rhs_kernel.configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info, _a_offset == 0 ? nullptr : &_vector_sum_col,
                                                     _b_offset == 0 ? nullptr : &_vector_sum_row, c, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
          }
          else
@@ -231,7 +236,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
  
              if(_is_gemm_reshaped)
              {
-                _mm_reshaped_only_rhs_kernel.configure(_matrix_a, matrix_b, &_mm_result_s32, gemm_kernel_info);
+                _mm_reshaped_only_rhs_kernel.configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, gemm_kernel_info);
              }
              else
              {
@@ -239,11 +244,11 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
                  std::tie(lhs_info, rhs_info) = CLGEMMNativeKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8);
  
                  // Configure matrix multiply kernel
-                _mm_native_kernel.configure(_matrix_a, matrix_b, &_mm_result_s32, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
+                _mm_native_kernel.configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
  
-                _offset_contribution_output_stage_kernel.configure(&_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, output, a->info()->dimension(0),
+                _offset_contribution_output_stage_kernel.configure(compile_context, &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, output,
+                                                                   a->info()->dimension(0),
                                                                     _a_offset, _b_offset, gemmlowp_output_stage, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
-
                  _mm_result_s32.allocator()->allocate();
              }
          }
@@ -264,7 +269,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
          if(_is_gemm_reshaped)
          {
              // Configure and tune matrix multiply kernel
-            _mm_reshaped_only_rhs_kernel.configure(_matrix_a, matrix_b, output, gemm_kernel_info);
+            _mm_reshaped_only_rhs_kernel.configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info);
          }
          else
          {
@@ -272,11 +277,12 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor
              std::tie(lhs_info, rhs_info) = CLGEMMNativeKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8);
  
              // Configure matrix multiply kernel
-            _mm_native_kernel.configure(_matrix_a, matrix_b, output, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
+            _mm_native_kernel.configure(compile_context, _matrix_a, matrix_b, output, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
          }
  
          // Configure offset contribution kernel
-        _offset_contribution_kernel.configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, a->info()->dimension(0), _a_offset, _b_offset);
+        _offset_contribution_kernel.configure(compile_context, output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, a->info()->dimension(0), _a_offset,
+                                              _b_offset);
      }
  
      // Allocate tensors
diff --git a/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp b/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp

index aff7f54a82efb8f4b4ffce86b277790973979ef0..18e002aa3d3735cfd7768983051761ae6dda549a 100644 (file)
--- a/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp
@@ -43,7 +43,23 @@ void CLGEMMLowpQuantizeDownInt32ToUint8Scale::configure(const ICLTensor *input,
      info.gemmlowp_max_bound      = max;
  
      auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ScaleKernel>();
-    k->configure(input, bias, output, &info);
+    k->configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, &info);
+    _kernel = std::move(k);
+}
+
+void CLGEMMLowpQuantizeDownInt32ToUint8Scale::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset,
+                                                        int result_mult_int,
+                                                        int result_shift, int min, int max)
+{
+    GEMMLowpOutputStageInfo info = GEMMLowpOutputStageInfo();
+    info.gemmlowp_offset         = result_offset;
+    info.gemmlowp_multiplier     = result_mult_int;
+    info.gemmlowp_shift          = result_shift;
+    info.gemmlowp_min_bound      = min;
+    info.gemmlowp_max_bound      = max;
+
+    auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ScaleKernel>();
+    k->configure(compile_context, input, bias, output, &info);
      _kernel = std::move(k);
  }
  
@@ -59,9 +75,16 @@ Status CLGEMMLowpQuantizeDownInt32ToUint8Scale::validate(const ITensorInfo *inpu
  void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
                                                                      int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
                                                                      int min, int max)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
+}
+
+void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
+                                                                    int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
+                                                                    int min, int max)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel>();
-    k->configure(input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
+    k->configure(compile_context, input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
      _kernel = std::move(k);
  }
  
@@ -76,7 +99,16 @@ void CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::configure(const ICLTens
                                                                     int min, int max)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel>();
-    k->configure(input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
+    k->configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
+    _kernel = std::move(k);
+}
+
+void CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
+                                                                   int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
+                                                                   int min, int max)
+{
+    auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel>();
+    k->configure(compile_context, input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
      _kernel = std::move(k);
  }
  
@@ -97,7 +129,22 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat::configure(const ICLTensor *
      info.gemmlowp_max_bound       = max;
  
      auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel>();
-    k->configure(input, bias, output, &info);
+    k->configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, &info);
+    _kernel = std::move(k);
+}
+
+void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
+                                                               float multiplier, int offset,
+                                                               int min, int max)
+{
+    GEMMLowpOutputStageInfo info  = GEMMLowpOutputStageInfo();
+    info.gemmlowp_offset          = offset;
+    info.gemmlowp_real_multiplier = multiplier;
+    info.gemmlowp_min_bound       = min;
+    info.gemmlowp_max_bound       = max;
+
+    auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel>();
+    k->configure(compile_context, input, bias, output, &info);
      _kernel = std::move(k);
  }
  
@@ -113,9 +160,16 @@ Status CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat::validate(const ITensorInf
  void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
                                                                      int result_fixedpoint_multiplier, int result_shift,
                                                                      int min, int max)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, result_fixedpoint_multiplier, result_shift, min, max);
+}
+
+void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
+                                                                    int result_fixedpoint_multiplier, int result_shift,
+                                                                    int min, int max)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel>();
-    k->configure(input, bias, output, result_fixedpoint_multiplier, result_shift, min, max);
+    k->configure(compile_context, input, bias, output, result_fixedpoint_multiplier, result_shift, min, max);
      _kernel = std::move(k);
  }
  
@@ -126,6 +180,11 @@ Status CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::validate(const ITens
  }
  
  void CLGEMMLowpOutputStage::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, info);
+}
+
+void CLGEMMLowpOutputStage::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
  
@@ -138,14 +197,14 @@ void CLGEMMLowpOutputStage::configure(const ICLTensor *input, const ICLTensor *b
                  case DataType::QASYMM8:
                  {
                      auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel>();
-                    k->configure(input, bias, output, info.gemmlowp_multiplier, info.gemmlowp_shift, info.gemmlowp_offset, info.gemmlowp_min_bound, info.gemmlowp_max_bound);
+                    k->configure(compile_context, input, bias, output, info.gemmlowp_multiplier, info.gemmlowp_shift, info.gemmlowp_offset, info.gemmlowp_min_bound, info.gemmlowp_max_bound);
                      _kernel = std::move(k);
                      break;
                  }
                  case DataType::QASYMM8_SIGNED:
                  {
                      auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel>();
-                    k->configure(input, bias, output, info.gemmlowp_multiplier, info.gemmlowp_shift, info.gemmlowp_offset, info.gemmlowp_min_bound, info.gemmlowp_max_bound);
+                    k->configure(compile_context, input, bias, output, info.gemmlowp_multiplier, info.gemmlowp_shift, info.gemmlowp_offset, info.gemmlowp_min_bound, info.gemmlowp_max_bound);
                      _kernel = std::move(k);
                      break;
                  }
@@ -164,14 +223,14 @@ void CLGEMMLowpOutputStage::configure(const ICLTensor *input, const ICLTensor *b
          case GEMMLowpOutputStageType::QUANTIZE_DOWN:
          {
              auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ScaleKernel>();
-            k->configure(input, bias, output, &info);
+            k->configure(compile_context, input, bias, output, &info);
              _kernel = std::move(k);
              break;
          }
          case GEMMLowpOutputStageType::QUANTIZE_DOWN_FLOAT:
          {
              auto k = arm_compute::support::cpp14::make_unique<CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel>();
-            k->configure(input, bias, output, &info);
+            k->configure(compile_context, input, bias, output, &info);
              _kernel = std::move(k);
              break;
          }
diff --git a/src/runtime/CL/functions/CLGather.cpp b/src/runtime/CL/functions/CLGather.cpp

index 08ec4e02d1021c626b26872e6fca45d2caad12af..e2b18e0f55726ca7b32df88dcfd17026d5aed64a 100644 (file)
--- a/src/runtime/CL/functions/CLGather.cpp
+++ b/src/runtime/CL/functions/CLGather.cpp
@@ -30,9 +30,14 @@
  namespace arm_compute
  {
  void CLGather::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, indices, output, axis);
+}
+
+void CLGather::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLGatherKernel>();
-    k->configure(input, indices, output, axis);
+    k->configure(compile_context, input, indices, output, axis);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLGaussian3x3.cpp b/src/runtime/CL/functions/CLGaussian3x3.cpp

index c187891b12e8db631032909263fb5dda06fce1cd..47367c4b171fad6bda0e48294f6ca6092ca913b9 100644 (file)
--- a/src/runtime/CL/functions/CLGaussian3x3.cpp
+++ b/src/runtime/CL/functions/CLGaussian3x3.cpp
@@ -32,9 +32,14 @@
  using namespace arm_compute;
  
  void CLGaussian3x3::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value);
+}
+
+void CLGaussian3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLGaussian3x3Kernel>();
-    k->configure(input, output, border_mode == BorderMode::UNDEFINED);
+    k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
      _kernel = std::move(k);
-    _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
  }
diff --git a/src/runtime/CL/functions/CLGaussian5x5.cpp b/src/runtime/CL/functions/CLGaussian5x5.cpp

index ea803e4796e1e37e76cf524286427d0c526adb7d..6b82cd0c3510e6ebefe81d6ad96112c9246f9fe2 100644 (file)
--- a/src/runtime/CL/functions/CLGaussian5x5.cpp
+++ b/src/runtime/CL/functions/CLGaussian5x5.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -41,6 +41,11 @@ CLGaussian5x5::CLGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager)
  }
  
  void CLGaussian5x5::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value);
+}
+
+void CLGaussian5x5::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
  {
      ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
  
@@ -50,9 +55,9 @@ void CLGaussian5x5::configure(ICLTensor *input, ICLTensor *output, BorderMode bo
      _memory_group.manage(&_tmp);
  
      // Configure kernels
-    _kernel_hor.configure(input, &_tmp, border_mode == BorderMode::UNDEFINED);
-    _kernel_vert.configure(&_tmp, output, border_mode == BorderMode::UNDEFINED);
-    _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+    _kernel_hor.configure(compile_context, input, &_tmp, border_mode == BorderMode::UNDEFINED);
+    _kernel_vert.configure(compile_context, &_tmp, output, border_mode == BorderMode::UNDEFINED);
+    _border_handler.configure(compile_context, input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
  
      // Allocate intermediate buffers
      _tmp.allocator()->allocate();
diff --git a/src/runtime/CL/functions/CLGaussianPyramid.cpp b/src/runtime/CL/functions/CLGaussianPyramid.cpp

index 3cda1be2dfe2d11091d911e13b34f64fb899be32..1ac98787ac0fc8ef42fdb9e0a1a2d6c9683ba0f0 100644 (file)
--- a/src/runtime/CL/functions/CLGaussianPyramid.cpp
+++ b/src/runtime/CL/functions/CLGaussianPyramid.cpp
@@ -56,6 +56,11 @@ CLGaussianPyramidHalf::CLGaussianPyramidHalf() // NOLINT
  }
  
  void CLGaussianPyramidHalf::configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, pyramid, border_mode, constant_border_value);
+}
+
+void CLGaussianPyramidHalf::configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value)
  {
      ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
      ARM_COMPUTE_ERROR_ON(pyramid == nullptr);
@@ -90,16 +95,16 @@ void CLGaussianPyramidHalf::configure(ICLTensor *input, CLPyramid *pyramid, Bord
          for(size_t i = 0; i < num_levels - 1; ++i)
          {
              /* Configure horizontal kernel */
-            _horizontal_reduction[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i));
+            _horizontal_reduction[i].configure(compile_context, _pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i));
  
              /* Configure vertical kernel */
-            _vertical_reduction[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1));
+            _vertical_reduction[i].configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1));
  
              /* Configure border */
-            _horizontal_border_handler[i].configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i].border_size(), border_mode, PixelValue(constant_border_value));
+            _horizontal_border_handler[i].configure(compile_context, _pyramid->get_pyramid_level(i), _horizontal_reduction[i].border_size(), border_mode, PixelValue(constant_border_value));
  
              /* Configure border */
-            _vertical_border_handler[i].configure(_tmp.get_pyramid_level(i), _vertical_reduction[i].border_size(), border_mode, PixelValue(pixel_value_u16));
+            _vertical_border_handler[i].configure(compile_context, _tmp.get_pyramid_level(i), _vertical_reduction[i].border_size(), border_mode, PixelValue(pixel_value_u16));
          }
          _tmp.allocate();
      }
@@ -136,6 +141,11 @@ CLGaussianPyramidOrb::CLGaussianPyramidOrb() // NOLINT
  }
  
  void CLGaussianPyramidOrb::configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, pyramid, border_mode, constant_border_value);
+}
+
+void CLGaussianPyramidOrb::configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value)
  {
      ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
      ARM_COMPUTE_ERROR_ON(nullptr == pyramid);
@@ -162,10 +172,10 @@ void CLGaussianPyramidOrb::configure(ICLTensor *input, CLPyramid *pyramid, Borde
          for(size_t i = 0; i < num_levels - 1; ++i)
          {
              /* Configure gaussian 5x5 */
-            _gauss5x5[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode, constant_border_value);
+            _gauss5x5[i].configure(compile_context, _pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode, constant_border_value);
  
              /* Configure scale image kernel */
-            _scale_nearest[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, SamplingPolicy::CENTER);
+            _scale_nearest[i].configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, SamplingPolicy::CENTER);
          }
  
          _tmp.allocate();
diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp

index 7d16753320571f1e3deafa97e982cfcb27181151..7f037fc51f6b2221d4fe6b7fabf2063ed65c4dcc 100644 (file)
--- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
+++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
@@ -63,6 +63,13 @@ CLGenerateProposalsLayer::CLGenerateProposalsLayer(std::shared_ptr<IMemoryManage
  
  void CLGenerateProposalsLayer::configure(const ICLTensor *scores, const ICLTensor *deltas, const ICLTensor *anchors, ICLTensor *proposals, ICLTensor *scores_out, ICLTensor *num_valid_proposals,
                                           const GenerateProposalsInfo &info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), scores, deltas, anchors, proposals, scores_out, num_valid_proposals, info);
+}
+
+void CLGenerateProposalsLayer::configure(const CLCompileContext &compile_context, const ICLTensor *scores, const ICLTensor *deltas, const ICLTensor *anchors, ICLTensor *proposals,
+                                         ICLTensor *scores_out,
+                                         ICLTensor *num_valid_proposals, const GenerateProposalsInfo &info)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(scores, deltas, anchors, proposals, scores_out, num_valid_proposals);
      ARM_COMPUTE_ERROR_THROW_ON(CLGenerateProposalsLayer::validate(scores->info(), deltas->info(), anchors->info(), proposals->info(), scores_out->info(), num_valid_proposals->info(), info));
@@ -84,7 +91,7 @@ void CLGenerateProposalsLayer::configure(const ICLTensor *scores, const ICLTenso
  
      // Compute all the anchors
      _memory_group.manage(&_all_anchors);
-    _compute_anchors_kernel.configure(anchors, &_all_anchors, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()));
+    _compute_anchors_kernel.configure(compile_context, anchors, &_all_anchors, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()));
  
      const TensorShape flatten_shape_deltas(values_per_roi, total_num_anchors);
      _deltas_flattened.allocator()->init(TensorInfo(flatten_shape_deltas, 1, scores_data_type, deltas->info()->quantization_info()));
@@ -94,13 +101,13 @@ void CLGenerateProposalsLayer::configure(const ICLTensor *scores, const ICLTenso
      if(!_is_nhwc)
      {
          _memory_group.manage(&_deltas_permuted);
-        _permute_deltas_kernel.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
-        _flatten_deltas_kernel.configure(&_deltas_permuted, &_deltas_flattened);
+        _permute_deltas_kernel.configure(compile_context, deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
+        _flatten_deltas_kernel.configure(compile_context, &_deltas_permuted, &_deltas_flattened);
          _deltas_permuted.allocator()->allocate();
      }
      else
      {
-        _flatten_deltas_kernel.configure(deltas, &_deltas_flattened);
+        _flatten_deltas_kernel.configure(compile_context, deltas, &_deltas_flattened);
      }
  
      const TensorShape flatten_shape_scores(1, total_num_anchors);
@@ -111,13 +118,13 @@ void CLGenerateProposalsLayer::configure(const ICLTensor *scores, const ICLTenso
      if(!_is_nhwc)
      {
          _memory_group.manage(&_scores_permuted);
-        _permute_scores_kernel.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
-        _flatten_scores_kernel.configure(&_scores_permuted, &_scores_flattened);
+        _permute_scores_kernel.configure(compile_context, scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
+        _flatten_scores_kernel.configure(compile_context, &_scores_permuted, &_scores_flattened);
          _scores_permuted.allocator()->allocate();
      }
      else
      {
-        _flatten_scores_kernel.configure(scores, &_scores_flattened);
+        _flatten_scores_kernel.configure(compile_context, scores, &_scores_flattened);
      }
  
      CLTensor *anchors_to_use = &_all_anchors;
@@ -129,18 +136,18 @@ void CLGenerateProposalsLayer::configure(const ICLTensor *scores, const ICLTenso
          _memory_group.manage(&_all_anchors_f32);
          _memory_group.manage(&_deltas_flattened_f32);
          // Dequantize anchors to float
-        _dequantize_anchors.configure(&_all_anchors, &_all_anchors_f32);
+        _dequantize_anchors.configure(compile_context, &_all_anchors, &_all_anchors_f32);
          _all_anchors.allocator()->allocate();
          anchors_to_use = &_all_anchors_f32;
          // Dequantize deltas to float
-        _dequantize_deltas.configure(&_deltas_flattened, &_deltas_flattened_f32);
+        _dequantize_deltas.configure(compile_context, &_deltas_flattened, &_deltas_flattened_f32);
          _deltas_flattened.allocator()->allocate();
          deltas_to_use = &_deltas_flattened_f32;
      }
      // Bounding box transform
      _memory_group.manage(&_all_proposals);
      BoundingBoxTransformInfo bbox_info(info.im_width(), info.im_height(), 1.f);
-    _bounding_box_kernel.configure(anchors_to_use, &_all_proposals, deltas_to_use, bbox_info);
+    _bounding_box_kernel.configure(compile_context, anchors_to_use, &_all_proposals, deltas_to_use, bbox_info);
      deltas_to_use->allocator()->allocate();
      anchors_to_use->allocator()->allocate();
  
@@ -150,7 +157,7 @@ void CLGenerateProposalsLayer::configure(const ICLTensor *scores, const ICLTenso
          _memory_group.manage(&_all_proposals_quantized);
          // Requantize all_proposals to QASYMM16 with 0.125 scale and 0 offset
          _all_proposals_quantized.allocator()->init(TensorInfo(_all_proposals.info()->tensor_shape(), 1, DataType::QASYMM16, QuantizationInfo(0.125f, 0)));
-        _quantize_all_proposals.configure(&_all_proposals, &_all_proposals_quantized);
+        _quantize_all_proposals.configure(compile_context, &_all_proposals, &_all_proposals_quantized);
          _all_proposals.allocator()->allocate();
          _all_proposals_to_use = &_all_proposals_quantized;
      }
@@ -185,7 +192,7 @@ void CLGenerateProposalsLayer::configure(const ICLTensor *scores, const ICLTenso
      _scores_flattened.allocator()->allocate();
  
      // Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images
-    _pad_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
+    _pad_kernel.configure(compile_context, &_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
      _proposals_4_roi_values.allocator()->allocate();
  }
  
diff --git a/src/runtime/CL/functions/CLHOGDescriptor.cpp b/src/runtime/CL/functions/CLHOGDescriptor.cpp

index 09314439a8813959edeaeeb6213ef08888299f91..0645cfdf22bf7a9b9b8f439f39a8ecaf6bc37b1a 100644 (file)
--- a/src/runtime/CL/functions/CLHOGDescriptor.cpp
+++ b/src/runtime/CL/functions/CLHOGDescriptor.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -37,6 +37,11 @@ CLHOGDescriptor::CLHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager)
  }
  
  void CLHOGDescriptor::configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, hog, border_mode, constant_border_value);
+}
+
+void CLHOGDescriptor::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value)
  {
      ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
      ARM_COMPUTE_ERROR_ON(nullptr == output);
@@ -76,16 +81,16 @@ void CLHOGDescriptor::configure(ICLTensor *input, ICLTensor *output, const IHOG
      _memory_group.manage(&_phase);
  
      // Initialise gradient kernel
-    _gradient.configure(input, &_mag, &_phase, hog_info->phase_type(), border_mode, constant_border_value);
+    _gradient.configure(compile_context, input, &_mag, &_phase, hog_info->phase_type(), border_mode, constant_border_value);
  
      // Manage intermediate buffers
      _memory_group.manage(&_hog_space);
  
      // Initialise orientation binning kernel
-    _orient_bin.configure(&_mag, &_phase, &_hog_space, hog->info());
+    _orient_bin.configure(compile_context, &_mag, &_phase, &_hog_space, hog->info());
  
      // Initialize HOG norm kernel
-    _block_norm.configure(&_hog_space, output, hog->info());
+    _block_norm.configure(compile_context, &_hog_space, output, hog->info());
  
      // Allocate intermediate tensors
      _mag.allocator()->allocate();
diff --git a/src/runtime/CL/functions/CLHOGDetector.cpp b/src/runtime/CL/functions/CLHOGDetector.cpp

index 8eb5e4251f0ba17839435596de85a08b8d67f865..bf9bae1e8b91a67971e31635585d11bba6f7673f 100644 (file)
--- a/src/runtime/CL/functions/CLHOGDetector.cpp
+++ b/src/runtime/CL/functions/CLHOGDetector.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -37,6 +37,12 @@ CLHOGDetector::CLHOGDetector()
  }
  
  void CLHOGDetector::configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold, size_t idx_class)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, hog, detection_windows, detection_window_stride, threshold, idx_class);
+}
+
+void CLHOGDetector::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride,
+                              float threshold, size_t idx_class)
  {
      _detection_windows = detection_windows;
  
@@ -44,7 +50,7 @@ void CLHOGDetector::configure(const ICLTensor *input, const ICLHOG *hog, ICLDete
      _num_detection_windows = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(unsigned int));
  
      // Configure HOGDetectorKernel
-    _hog_detector_kernel.configure(input, hog, detection_windows, &_num_detection_windows, detection_window_stride, threshold, idx_class);
+    _hog_detector_kernel.configure(compile_context, input, hog, detection_windows, &_num_detection_windows, detection_window_stride, threshold, idx_class);
  }
  
  void CLHOGDetector::run()
diff --git a/src/runtime/CL/functions/CLHOGGradient.cpp b/src/runtime/CL/functions/CLHOGGradient.cpp

index e509fd8e36f1cb88927545a82b9bd097fe4335a6..acf5f2c568880393ceada550bdf328afc86d3d58 100644 (file)
--- a/src/runtime/CL/functions/CLHOGGradient.cpp
+++ b/src/runtime/CL/functions/CLHOGGradient.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -35,6 +35,12 @@ CLHOGGradient::CLHOGGradient(std::shared_ptr<IMemoryManager> memory_manager)
  }
  
  void CLHOGGradient::configure(ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output_magnitude, output_phase, phase_type, border_mode, constant_border_value);
+}
+
+void CLHOGGradient::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode,
+                              uint8_t constant_border_value)
  {
      ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
      ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_magnitude, 1, DataType::S16);
@@ -52,16 +58,16 @@ void CLHOGGradient::configure(ICLTensor *input, ICLTensor *output_magnitude, ICL
      _memory_group.manage(&_gy);
  
      // Initialise derivate kernel
-    _derivative.configure(input, &_gx, &_gy, border_mode, constant_border_value);
+    _derivative.configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
  
      // Initialise magnitude/phase kernel
      if(PhaseType::UNSIGNED == phase_type)
      {
-        _mag_phase.configure(&_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::UNSIGNED);
+        _mag_phase.configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::UNSIGNED);
      }
      else
      {
-        _mag_phase.configure(&_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::SIGNED);
+        _mag_phase.configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::SIGNED);
      }
  
      // Allocate intermediate tensors
diff --git a/src/runtime/CL/functions/CLHOGMultiDetection.cpp b/src/runtime/CL/functions/CLHOGMultiDetection.cpp

index 54ad1b35bf10925a8f2886d9180c170a6803cd0e..248f7307e6e8f583716395ef9b9889e8708404c9 100644 (file)
--- a/src/runtime/CL/functions/CLHOGMultiDetection.cpp
+++ b/src/runtime/CL/functions/CLHOGMultiDetection.cpp
@@ -54,6 +54,14 @@ CLHOGMultiDetection::CLHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_
  
  void CLHOGMultiDetection::configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode,
                                      uint8_t constant_border_value, float threshold, bool non_maxima_suppression, float min_distance)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, multi_hog, detection_windows, detection_window_strides, border_mode, constant_border_value, threshold, non_maxima_suppression,
+              min_distance);
+}
+
+void CLHOGMultiDetection::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows,
+                                    ICLSize2DArray *detection_window_strides, BorderMode border_mode,
+                                    uint8_t constant_border_value, float threshold, bool non_maxima_suppression, float min_distance)
  {
      ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
      ARM_COMPUTE_ERROR_ON_INVALID_MULTI_HOG(multi_hog);
@@ -145,7 +153,7 @@ void CLHOGMultiDetection::configure(ICLTensor *input, const ICLMultiHOG *multi_h
      _memory_group.manage(&_phase);
  
      // Initialise gradient kernel
-    _gradient_kernel.configure(input, &_mag, &_phase, phase_type, border_mode, constant_border_value);
+    _gradient_kernel.configure(compile_context, input, &_mag, &_phase, phase_type, border_mode, constant_border_value);
  
      // Configure NETensor for the HOG space and orientation binning kernel
      for(size_t i = 0; i < _num_orient_bin_kernel; ++i)
@@ -173,7 +181,7 @@ void CLHOGMultiDetection::configure(ICLTensor *input, const ICLMultiHOG *multi_h
          _memory_group.manage(&_hog_space[i]);
  
          // Initialise orientation binning kernel
-        _orient_bin_kernel[i].configure(&_mag, &_phase, &_hog_space[i], multi_hog->model(idx_multi_hog)->info());
+        _orient_bin_kernel[i].configure(compile_context, &_mag, &_phase, &_hog_space[i], multi_hog->model(idx_multi_hog)->info());
      }
  
      // Allocate intermediate tensors
@@ -194,7 +202,7 @@ void CLHOGMultiDetection::configure(ICLTensor *input, const ICLMultiHOG *multi_h
          _memory_group.manage(&_hog_norm_space[i]);
  
          // Initialize block normalization kernel
-        _block_norm_kernel[i].configure(&_hog_space[idx_orient_bin], &_hog_norm_space[i], multi_hog->model(idx_multi_hog)->info());
+        _block_norm_kernel[i].configure(compile_context, &_hog_space[idx_orient_bin], &_hog_norm_space[i], multi_hog->model(idx_multi_hog)->info());
      }
  
      // Allocate intermediate tensors
@@ -210,7 +218,7 @@ void CLHOGMultiDetection::configure(ICLTensor *input, const ICLMultiHOG *multi_h
      {
          const size_t idx_block_norm = input_hog_detect[i];
  
-        _hog_detect_kernel[i].configure(&_hog_norm_space[idx_block_norm], multi_hog->cl_model(i), detection_windows, detection_window_strides->at(i), threshold, i);
+        _hog_detect_kernel[i].configure(compile_context, &_hog_norm_space[idx_block_norm], multi_hog->cl_model(i), detection_windows, detection_window_strides->at(i), threshold, i);
      }
  
      detection_window_strides->unmap(CLScheduler::get().queue());
diff --git a/src/runtime/CL/functions/CLHarrisCorners.cpp b/src/runtime/CL/functions/CLHarrisCorners.cpp

index 3cae95f55119f0093266ed80180b96fc49039c2e..aecec0d3c53972ce477f62ae64559e3d096edc70 100644 (file)
--- a/src/runtime/CL/functions/CLHarrisCorners.cpp
+++ b/src/runtime/CL/functions/CLHarrisCorners.cpp
@@ -64,6 +64,13 @@ CLHarrisCorners::CLHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager)
  void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist,
                                  float sensitivity, int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners,
                                  BorderMode border_mode, uint8_t constant_border_value, bool use_fp16)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, threshold, min_dist, sensitivity, gradient_size, block_size, corners, border_mode, constant_border_value, use_fp16);
+}
+
+void CLHarrisCorners::configure(const CLCompileContext &compile_context, ICLImage *input, float threshold, float min_dist,
+                                float sensitivity, int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners,
+                                BorderMode border_mode, uint8_t constant_border_value, bool use_fp16)
  {
      ARM_COMPUTE_UNUSED(use_fp16); //TODO(COMPMID-772): Add half float support
      ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
@@ -96,21 +103,21 @@ void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist
          case 3:
          {
              auto k = arm_compute::support::cpp14::make_unique<CLSobel3x3>();
-            k->configure(input, &_gx, &_gy, border_mode, constant_border_value);
+            k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
              _sobel = std::move(k);
              break;
          }
          case 5:
          {
              auto k = arm_compute::support::cpp14::make_unique<CLSobel5x5>();
-            k->configure(input, &_gx, &_gy, border_mode, constant_border_value);
+            k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
              _sobel = std::move(k);
              break;
          }
          case 7:
          {
              auto k = arm_compute::support::cpp14::make_unique<CLSobel7x7>();
-            k->configure(input, &_gx, &_gy, border_mode, constant_border_value);
+            k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
              _sobel = std::move(k);
              break;
          }
@@ -126,11 +133,11 @@ void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist
      _memory_group.manage(&_score);
  
      // Set/init Harris Score kernel accordingly with block_size
-    _harris_score.configure(&_gx, &_gy, &_score, block_size, pow4_normalization_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
+    _harris_score.configure(compile_context, &_gx, &_gy, &_score, block_size, pow4_normalization_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
  
      // Configure border filling using harris score kernel's block size
-    _border_gx.configure(&_gx, _harris_score.border_size(), border_mode, PixelValue(constant_border_value));
-    _border_gy.configure(&_gy, _harris_score.border_size(), border_mode, PixelValue(constant_border_value));
+    _border_gx.configure(compile_context, &_gx, _harris_score.border_size(), border_mode, PixelValue(constant_border_value));
+    _border_gy.configure(compile_context, &_gy, _harris_score.border_size(), border_mode, PixelValue(constant_border_value));
  
      // Allocate intermediate buffers
      _gx.allocator()->allocate();
@@ -140,7 +147,7 @@ void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist
      _memory_group.manage(&_nonmax);
  
      // Init non-maxima suppression function
-    _non_max_suppr.configure(&_score, &_nonmax, border_mode);
+    _non_max_suppr.configure(compile_context, &_score, &_nonmax, border_mode);
  
      // Allocate intermediate buffers
      _score.allocator()->allocate();
diff --git a/src/runtime/CL/functions/CLHistogram.cpp b/src/runtime/CL/functions/CLHistogram.cpp

index eb543387f51a07bb8bdc8c84705d2b10004c9019..e7230243345131ffdc0e2aaf7015fec959c833e3 100644 (file)
--- a/src/runtime/CL/functions/CLHistogram.cpp
+++ b/src/runtime/CL/functions/CLHistogram.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -34,8 +34,13 @@ CLHistogram::CLHistogram()
  
  void CLHistogram::configure(const ICLImage *input, ICLDistribution1D *output)
  {
-    _kernel.configure(input, output);
-    _kernel_border.configure(input, output);
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLHistogram::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output)
+{
+    _kernel.configure(compile_context, input, output);
+    _kernel_border.configure(compile_context, input, output);
  }
  
  void CLHistogram::run()
diff --git a/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp b/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp

index e639e743941894309033649de886b454be6a6b20..273a873c81e2b9e171964be22c1860cea360d708 100644 (file)
--- a/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp
@@ -33,9 +33,14 @@ CLInstanceNormalizationLayer::CLInstanceNormalizationLayer()
  }
  
  void CLInstanceNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, float gamma, float beta, float epsilon, bool use_mixed_precision)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, gamma, beta, epsilon, use_mixed_precision);
+}
+
+void CLInstanceNormalizationLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, float gamma, float beta, float epsilon, bool use_mixed_precision)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLInstanceNormalizationLayerKernel>();
-    k->configure(input, output, InstanceNormalizationLayerKernelInfo(gamma, beta, epsilon, use_mixed_precision));
+    k->configure(compile_context, input, output, InstanceNormalizationLayerKernelInfo(gamma, beta, epsilon, use_mixed_precision));
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLIntegralImage.cpp b/src/runtime/CL/functions/CLIntegralImage.cpp

index 2d54be32fa04804f7c7ea28e361bb8ffc1959cc5..b3be2f8c2cbf57cee92d41fbf0091007493eac99 100644 (file)
--- a/src/runtime/CL/functions/CLIntegralImage.cpp
+++ b/src/runtime/CL/functions/CLIntegralImage.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -35,8 +35,13 @@ CLIntegralImage::CLIntegralImage()
  
  void CLIntegralImage::configure(const ICLTensor *input, ICLTensor *output)
  {
-    _integral_hor.configure(input, output);
-    _integral_vert.configure(output);
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLIntegralImage::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
+{
+    _integral_hor.configure(compile_context, input, output);
+    _integral_vert.configure(compile_context, output);
  }
  
  void CLIntegralImage::run()
diff --git a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp

index 95a62c5317a22157e993485fde514f10597550dc..14c83cd5434a2eded10b207e710131a37a037260 100644 (file)
--- a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp
+++ b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp
@@ -44,6 +44,11 @@ CLL2NormalizeLayer::CLL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_ma
  }
  
  void CLL2NormalizeLayer::configure(ICLTensor *input, ICLTensor *output, int axis, float epsilon)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, axis, epsilon);
+}
+
+void CLL2NormalizeLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int axis, float epsilon)
  {
      // Reset auxiliary tensor
      _sumsq.allocator()->init(TensorInfo());
@@ -53,8 +58,8 @@ void CLL2NormalizeLayer::configure(ICLTensor *input, ICLTensor *output, int axis
  
      // Configure kernels
      const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
-    _reduce_func.configure(input, &_sumsq, actual_axis, ReductionOperation::SUM_SQUARE);
-    _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon);
+    _reduce_func.configure(compile_context, input, &_sumsq, actual_axis, ReductionOperation::SUM_SQUARE);
+    _normalize_kernel.configure(compile_context, input, &_sumsq, output, axis, epsilon);
  
      // Allocate intermediate tensor
      _sumsq.allocator()->allocate();
diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp

index 3a3917784b50946263fe7f8c082bb6b339289723..32ff813f435185c948d9434ff077df83fcc3c806 100644 (file)
--- a/src/runtime/CL/functions/CLLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayer.cpp
@@ -58,6 +58,19 @@ void CLLSTMLayer::configure(const ICLTensor *input,
                              const ICLTensor *output_state_in, const ICLTensor *cell_state_in,
                              ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output,
                              const LSTMParams<ICLTensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold, float projection_threshold)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_forget_weights, recurrent_to_cell_weights,
+              recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias, output_state_in, cell_state_in, scratch_buffer, output_state_out, cell_state_out, output, lstm_params, activation_info,
+              cell_threshold, projection_threshold);
+}
+
+void CLLSTMLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input,
+                            const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
+                            const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
+                            const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
+                            const ICLTensor *output_state_in, const ICLTensor *cell_state_in,
+                            ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output,
+                            const LSTMParams<ICLTensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold, float projection_threshold)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input,
                                   input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
@@ -97,7 +110,7 @@ void CLLSTMLayer::configure(const ICLTensor *input,
      _forget_gate_out2.allocator()->init(TensorInfo(concat_shape, 1, input->info()->data_type()));
  
      _memory_group.manage(&_forget_gate_out2);
-    _concat_inputs_forget_gate.configure(input, output_state_in, &_forget_gate_out2);
+    _concat_inputs_forget_gate.configure(compile_context, input, output_state_in, &_forget_gate_out2);
  
      std::vector<const ICLTensor *> weights_vector;
  
@@ -106,10 +119,10 @@ void CLLSTMLayer::configure(const ICLTensor *input,
      const TensorShape weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(weights_vector, 0);
      _forget_gate_out6.allocator()->init(TensorInfo(weights_concat_shape, 1, input->info()->data_type()));
  
-    _concat_weights_forget_gate.configure(input_to_forget_weights, recurrent_to_forget_weights, &_forget_gate_out6);
+    _concat_weights_forget_gate.configure(compile_context, input_to_forget_weights, recurrent_to_forget_weights, &_forget_gate_out6);
  
      _memory_group.manage(&_forget_gate_out5);
-    _fully_connected_forget_gate.configure(&_forget_gate_out2, &_forget_gate_out6, (_is_layer_norm_lstm) ? nullptr : forget_gate_bias, &_forget_gate_out5);
+    _fully_connected_forget_gate.configure(compile_context, &_forget_gate_out2, &_forget_gate_out6, (_is_layer_norm_lstm) ? nullptr : forget_gate_bias, &_forget_gate_out5);
      _memory_group.manage(&_forget_gate_out1);
      _memory_group.manage(&_forget_gate_out3);
      _forget_gate_out6.allocator()->allocate();
@@ -121,8 +134,8 @@ void CLLSTMLayer::configure(const ICLTensor *input,
  
          _run_peephole_opt = true;
          _memory_group.manage(&_forget_gate_out4);
-        _pixelwise_mul_forget_gate.configure(cell_state_in, lstm_params.cell_to_forget_weights(), &_forget_gate_out4, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
-        _accum_forget_gate1.configure(&_forget_gate_out5, &_forget_gate_out4, &_forget_gate_out3, ConvertPolicy::SATURATE);
+        _pixelwise_mul_forget_gate.configure(compile_context, cell_state_in, lstm_params.cell_to_forget_weights(), &_forget_gate_out4, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
+        _accum_forget_gate1.configure(compile_context, &_forget_gate_out5, &_forget_gate_out4, &_forget_gate_out3, ConvertPolicy::SATURATE);
          _forget_gate_out4.allocator()->allocate();
          _forget_gate_out5.allocator()->allocate();
          forget_gate_out = &_forget_gate_out3;
@@ -137,15 +150,16 @@ void CLLSTMLayer::configure(const ICLTensor *input,
          _forget_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
          _memory_group.manage(&_forget_layer_norm_out1);
          _memory_group.manage(&_forget_layer_norm_out2);
-        _mean_std_norm_forget_gate.configure(forget_gate_out);
-        _pixelwise_mul_forget_gate_coeff.configure(forget_gate_out, lstm_params.forget_layer_norm_weights(), &_forget_layer_norm_out1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
+        _mean_std_norm_forget_gate.configure(compile_context, forget_gate_out);
+        _pixelwise_mul_forget_gate_coeff.configure(compile_context, forget_gate_out, lstm_params.forget_layer_norm_weights(), &_forget_layer_norm_out1, 1, ConvertPolicy::SATURATE,
+                                                   RoundingPolicy::TO_NEAREST_EVEN);
          // forget_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
          forget_gate_out->allocator()->allocate();
-        _accum_forget_gate_bias.configure(ArithmeticOperation::ADD, &_forget_layer_norm_out1, forget_gate_bias, &_forget_layer_norm_out2, ConvertPolicy::SATURATE);
+        _accum_forget_gate_bias.configure(compile_context, ArithmeticOperation::ADD, &_forget_layer_norm_out1, forget_gate_bias, &_forget_layer_norm_out2, ConvertPolicy::SATURATE);
          _forget_layer_norm_out1.allocator()->allocate();
          forget_gate_out = &_forget_layer_norm_out2;
      }
-    _activation_forget_gate.configure(forget_gate_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+    _activation_forget_gate.configure(compile_context, forget_gate_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
  
      // Configure block that calculates the input gate
      // input_gate = Activation(input * input_to_input_weights + output_state * recurrent_to_input_weights + PixelWiseMul(cell_state, cell_to_input_weights) + input_gate_bias), without CIFG
@@ -158,8 +172,8 @@ void CLLSTMLayer::configure(const ICLTensor *input,
      {
          _memory_group.manage(&_input_gate_out1);
          _ones.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
-        _ones_memset_kernel.configure(&_ones, PixelValue(1, _ones.info()->data_type()));
-        _subtract_input_gate.configure(ArithmeticOperation::SUB, &_ones, forget_gate_out, &_input_gate_out1, ConvertPolicy::SATURATE);
+        _ones_memset_kernel.configure(compile_context, &_ones, PixelValue(1, _ones.info()->data_type()));
+        _subtract_input_gate.configure(compile_context, ArithmeticOperation::SUB, &_ones, forget_gate_out, &_input_gate_out1, ConvertPolicy::SATURATE);
          _ones.allocator()->allocate();
          _run_cifg_opt = true;
      }
@@ -174,20 +188,20 @@ void CLLSTMLayer::configure(const ICLTensor *input,
          TensorShape lstm_weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(lstm_weights, 0);
          _input_gate_out2.allocator()->init(TensorInfo(lstm_weights_concat_shape, 1, input->info()->data_type()));
  
-        _concat_weights_input_gate.configure(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(), &_input_gate_out2);
+        _concat_weights_input_gate.configure(compile_context, lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(), &_input_gate_out2);
  
          _memory_group.manage(&_input_gate_out1);
  
          _memory_group.manage(&_input_gate_out3);
-        _fully_connected_input_gate.configure(&_forget_gate_out2, &_input_gate_out2, (_is_layer_norm_lstm) ? nullptr : lstm_params.input_gate_bias(), &_input_gate_out3);
+        _fully_connected_input_gate.configure(compile_context, &_forget_gate_out2, &_input_gate_out2, (_is_layer_norm_lstm) ? nullptr : lstm_params.input_gate_bias(), &_input_gate_out3);
          _input_gate_out2.allocator()->allocate();
  
          input_gate_out = &_input_gate_out3;
          if(_run_peephole_opt)
          {
              _memory_group.manage(&_input_gate_out4);
-            _pixelwise_mul_input_gate.configure(cell_state_in, lstm_params.cell_to_input_weights(), &_input_gate_out4, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
-            _accum_input_gate1.configure(&_input_gate_out3, &_input_gate_out4, &_input_gate_out1, ConvertPolicy::SATURATE);
+            _pixelwise_mul_input_gate.configure(compile_context, cell_state_in, lstm_params.cell_to_input_weights(), &_input_gate_out4, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
+            _accum_input_gate1.configure(compile_context, &_input_gate_out3, &_input_gate_out4, &_input_gate_out1, ConvertPolicy::SATURATE);
              _input_gate_out3.allocator()->allocate();
              _input_gate_out4.allocator()->allocate();
              input_gate_out = &_input_gate_out1;
@@ -203,15 +217,16 @@ void CLLSTMLayer::configure(const ICLTensor *input,
              _input_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
              _memory_group.manage(&_input_layer_norm_out1);
              _memory_group.manage(&_input_layer_norm_out2);
-            _mean_std_norm_input_gate.configure(input_gate_out);
-            _pixelwise_mul_input_gate_coeff.configure(input_gate_out, lstm_params.input_layer_norm_weights(), &_input_layer_norm_out1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
+            _mean_std_norm_input_gate.configure(compile_context, input_gate_out);
+            _pixelwise_mul_input_gate_coeff.configure(compile_context, input_gate_out, lstm_params.input_layer_norm_weights(), &_input_layer_norm_out1, 1, ConvertPolicy::SATURATE,
+                                                      RoundingPolicy::TO_NEAREST_EVEN);
              // input_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
              input_gate_out->allocator()->allocate();
-            _accum_input_gate_bias.configure(ArithmeticOperation::ADD, &_input_layer_norm_out1, lstm_params.input_gate_bias(), &_input_layer_norm_out2, ConvertPolicy::SATURATE);
+            _accum_input_gate_bias.configure(compile_context, ArithmeticOperation::ADD, &_input_layer_norm_out1, lstm_params.input_gate_bias(), &_input_layer_norm_out2, ConvertPolicy::SATURATE);
              _input_layer_norm_out1.allocator()->allocate();
              input_gate_out = &_input_layer_norm_out2;
          }
-        _activation_input_gate.configure(input_gate_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+        _activation_input_gate.configure(compile_context, input_gate_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
      }
  
      // Configure block that calculates the cell state
@@ -224,14 +239,14 @@ void CLLSTMLayer::configure(const ICLTensor *input,
      _cell_state_out5.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
  
      _memory_group.manage(&_cell_state_out1);
-    _fully_connected_cell_state.configure(input, input_to_cell_weights, (_is_layer_norm_lstm) ? nullptr : cell_bias, &_cell_state_out1);
+    _fully_connected_cell_state.configure(compile_context, input, input_to_cell_weights, (_is_layer_norm_lstm) ? nullptr : cell_bias, &_cell_state_out1);
      _memory_group.manage(&_cell_state_out2);
-    _transpose_cell_state.configure(recurrent_to_cell_weights, &_cell_state_out2);
+    _transpose_cell_state.configure(compile_context, recurrent_to_cell_weights, &_cell_state_out2);
      _memory_group.manage(&_cell_state_out3);
-    _gemm_cell_state1.configure(output_state_in, &_cell_state_out2, nullptr, &_cell_state_out3, 1.f, 0.f);
+    _gemm_cell_state1.configure(compile_context, output_state_in, &_cell_state_out2, nullptr, &_cell_state_out3, 1.f, 0.f);
      _cell_state_out2.allocator()->allocate();
      _memory_group.manage(&_cell_state_out4);
-    _accum_cell_state1.configure(ArithmeticOperation::ADD, &_cell_state_out1, &_cell_state_out3, &_cell_state_out4, ConvertPolicy::SATURATE);
+    _accum_cell_state1.configure(compile_context, ArithmeticOperation::ADD, &_cell_state_out1, &_cell_state_out3, &_cell_state_out4, ConvertPolicy::SATURATE);
      CLTensor *cell_state_out_ptr = &_cell_state_out4;
      if(_is_layer_norm_lstm)
      {
@@ -239,27 +254,28 @@ void CLLSTMLayer::configure(const ICLTensor *input,
          _cell_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
          _memory_group.manage(&_cell_layer_norm_out1);
          _memory_group.manage(&_cell_layer_norm_out2);
-        _mean_std_norm_cell_gate.configure(cell_state_out_ptr);
-        _pixelwise_mul_cell_gate_coeff.configure(cell_state_out_ptr, lstm_params.cell_layer_norm_weights(), &_cell_layer_norm_out1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
+        _mean_std_norm_cell_gate.configure(compile_context, cell_state_out_ptr);
+        _pixelwise_mul_cell_gate_coeff.configure(compile_context, cell_state_out_ptr, lstm_params.cell_layer_norm_weights(), &_cell_layer_norm_out1, 1, ConvertPolicy::SATURATE,
+                                                 RoundingPolicy::TO_NEAREST_EVEN);
          // cell_state_out_ptr is going to be reassigned, so allocate the tensor that it was assigned to before
          cell_state_out_ptr->allocator()->allocate();
-        _accum_cell_gate_bias.configure(ArithmeticOperation::ADD, &_cell_layer_norm_out1, cell_bias, &_cell_layer_norm_out2, ConvertPolicy::SATURATE);
+        _accum_cell_gate_bias.configure(compile_context, ArithmeticOperation::ADD, &_cell_layer_norm_out1, cell_bias, &_cell_layer_norm_out2, ConvertPolicy::SATURATE);
          _cell_layer_norm_out1.allocator()->allocate();
          cell_state_out_ptr = &_cell_layer_norm_out2;
      }
-    _activation_cell_state.configure(cell_state_out_ptr, nullptr, activation_info);
+    _activation_cell_state.configure(compile_context, cell_state_out_ptr, nullptr, activation_info);
      _memory_group.manage(&_cell_state_out5);
-    _pixelwise_mul_cell_state1.configure(cell_state_out_ptr, input_gate_out, &_cell_state_out5, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
+    _pixelwise_mul_cell_state1.configure(compile_context, cell_state_out_ptr, input_gate_out, &_cell_state_out5, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
      cell_state_out_ptr->allocator()->allocate();
-    _pixelwise_mul_cell_state2.configure(forget_gate_out, cell_state_in, &_cell_state_out3, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
-    _accum_cell_state2.configure(ArithmeticOperation::ADD, &_cell_state_out5, &_cell_state_out3, &_cell_state_out1, ConvertPolicy::SATURATE);
+    _pixelwise_mul_cell_state2.configure(compile_context, forget_gate_out, cell_state_in, &_cell_state_out3, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
+    _accum_cell_state2.configure(compile_context, ArithmeticOperation::ADD, &_cell_state_out5, &_cell_state_out3, &_cell_state_out1, ConvertPolicy::SATURATE);
      _cell_state_out3.allocator()->allocate();
      _cell_state_out5.allocator()->allocate();
      // Perform clipping
      if(cell_threshold != 0.f)
      {
          _perform_cell_clipping = true;
-        _cell_clip.configure(&_cell_state_out1, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, cell_threshold));
+        _cell_clip.configure(compile_context, &_cell_state_out1, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, cell_threshold));
      }
  
      // Configure block that calculates the output
@@ -274,12 +290,12 @@ void CLLSTMLayer::configure(const ICLTensor *input,
      TensorShape in_out_weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(in_out_weights, 0);
      _output2.allocator()->init(TensorInfo(in_out_weights_concat_shape, 1, input->info()->data_type()));
  
-    _concat_weights_output.configure(input_to_output_weights, recurrent_to_output_weights, &_output2);
+    _concat_weights_output.configure(compile_context, input_to_output_weights, recurrent_to_output_weights, &_output2);
  
      _memory_group.manage(&_output1);
      _memory_group.manage(&_output4);
  
-    _fully_connected_output.configure(&_forget_gate_out2, &_output2, (_is_layer_norm_lstm) ? nullptr : output_gate_bias, &_output4);
+    _fully_connected_output.configure(compile_context, &_forget_gate_out2, &_output2, (_is_layer_norm_lstm) ? nullptr : output_gate_bias, &_output4);
  
      _output2.allocator()->allocate();
      _forget_gate_out2.allocator()->allocate();
@@ -290,8 +306,8 @@ void CLLSTMLayer::configure(const ICLTensor *input,
          _output3.allocator()->init(TensorInfo(_cell_state_out1.info()->tensor_shape(), 1, input->info()->data_type()));
  
          _memory_group.manage(&_output3);
-        _pixelwise_mul_output_state1.configure(&_cell_state_out1, lstm_params.cell_to_output_weights(), &_output3, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
-        _accum_output1.configure(&_output4, &_output3, &_output1, ConvertPolicy::SATURATE);
+        _pixelwise_mul_output_state1.configure(compile_context, &_cell_state_out1, lstm_params.cell_to_output_weights(), &_output3, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
+        _accum_output1.configure(compile_context, &_output4, &_output3, &_output1, ConvertPolicy::SATURATE);
          _output4.allocator()->allocate();
          output_gate_out = &_output1;
  
@@ -308,15 +324,16 @@ void CLLSTMLayer::configure(const ICLTensor *input,
          _output_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
          _memory_group.manage(&_output_layer_norm_out1);
          _memory_group.manage(&_output_layer_norm_out2);
-        _mean_std_norm_output_gate.configure(output_gate_out);
-        _pixelwise_mul_output_gate_coeff.configure(output_gate_out, lstm_params.output_layer_norm_weights(), &_output_layer_norm_out1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
+        _mean_std_norm_output_gate.configure(compile_context, output_gate_out);
+        _pixelwise_mul_output_gate_coeff.configure(compile_context, output_gate_out, lstm_params.output_layer_norm_weights(), &_output_layer_norm_out1, 1, ConvertPolicy::SATURATE,
+                                                   RoundingPolicy::TO_NEAREST_EVEN);
          // output_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
          output_gate_out->allocator()->allocate();
-        _accum_output_gate_bias.configure(ArithmeticOperation::ADD, &_output_layer_norm_out1, output_gate_bias, &_output_layer_norm_out2, ConvertPolicy::SATURATE);
+        _accum_output_gate_bias.configure(compile_context, ArithmeticOperation::ADD, &_output_layer_norm_out1, output_gate_bias, &_output_layer_norm_out2, ConvertPolicy::SATURATE);
          _output_layer_norm_out1.allocator()->allocate();
          output_gate_out = &_output_layer_norm_out2;
      }
-    _activation_output.configure(output_gate_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+    _activation_output.configure(compile_context, output_gate_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
  
      // Configure block that calculates the output state
      /** lstm_res = PixelwiseMul(output, Activation(cell_state))
@@ -332,26 +349,26 @@ void CLLSTMLayer::configure(const ICLTensor *input,
      _output_state1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
  
      _memory_group.manage(&_cell_state_activation);
-    _activation_output_state.configure(&_cell_state_out1, &_cell_state_activation, activation_info);
-    _pixelwise_mul_output_state2.configure(&_cell_state_activation, output_gate_out, output_state_out_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
+    _activation_output_state.configure(compile_context, &_cell_state_out1, &_cell_state_activation, activation_info);
+    _pixelwise_mul_output_state2.configure(compile_context, &_cell_state_activation, output_gate_out, output_state_out_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
      _cell_state_activation.allocator()->allocate();
  
      if(lstm_params.has_projection())
      {
          _has_projection_weights = true;
-        _fully_connected_output_state.configure(output_state_out_tmp, lstm_params.projection_weights(), lstm_params.projection_bias(), output_state_out);
+        _fully_connected_output_state.configure(compile_context, output_state_out_tmp, lstm_params.projection_weights(), lstm_params.projection_bias(), output_state_out);
          _output_state1.allocator()->allocate();
          // Perform clipping
          if(projection_threshold != 0.f)
          {
              _perform_projection_clipping = true;
-            _projection_clip.configure(output_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold));
+            _projection_clip.configure(compile_context, output_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold));
          }
      }
  
      // Copy cell state and output
-    _copy_cell_state.configure(&_cell_state_out1, cell_state_out);
-    _copy_output.configure(output_state_out, output);
+    _copy_cell_state.configure(compile_context, &_cell_state_out1, cell_state_out);
+    _copy_output.configure(compile_context, output_state_out, output);
  
      // Vector for holding the tensors to store in scratch buffer
      std::vector<ICLTensor *> scratch_inputs;
@@ -362,7 +379,7 @@ void CLLSTMLayer::configure(const ICLTensor *input,
      scratch_inputs.emplace_back(&_cell_state_out1);
      scratch_inputs.emplace_back(forget_gate_out);
      scratch_inputs.emplace_back(output_gate_out);
-    _concat_scratch_buffer.configure(scratch_inputs, scratch_buffer, Window::DimX);
+    _concat_scratch_buffer.configure(compile_context, scratch_inputs, scratch_buffer, Window::DimX);
      input_gate_out->allocator()->allocate();
      _cell_state_out1.allocator()->allocate();
      forget_gate_out->allocator()->allocate();
diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp

index e5f127825bfcdf23190a0d1f48f2fc03d8969cb6..c57fcc9f2137f3ab4746f9b217bc3b7f37885adc 100644 (file)
--- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -61,6 +61,18 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
                                       const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
                                       ICLTensor *cell_state_in, const ICLTensor *output_state_in,
                                       ICLTensor *cell_state_out, ICLTensor *output_state_out)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_input_weights,
+              recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out,
+              output_state_out);
+}
+
+void CLLSTMLayerQuantized::configure(const CLCompileContext &compile_context, const ICLTensor *input,
+                                     const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
+                                     const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
+                                     const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
+                                     ICLTensor *cell_state_in, const ICLTensor *output_state_in,
+                                     ICLTensor *cell_state_out, ICLTensor *output_state_out)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
                                   recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
@@ -107,18 +119,18 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
      recurrent_weights_vector.emplace_back(recurrent_to_output_weights);
  
      _input_weights.allocator()->init(TensorInfo(TensorShape(input_size, 4 * output_size), 1, DataType::QASYMM8, qweights));
-    _concat_input_weights.configure(inputs_weights_vector, &_input_weights, Window::DimY);
+    _concat_input_weights.configure(compile_context, inputs_weights_vector, &_input_weights, Window::DimY);
  
      _recurrent_weights.allocator()->init(TensorInfo(TensorShape(output_size, 4 * output_size), 1, DataType::QASYMM8, qweights));
-    _concat_recurrent_weights.configure(recurrent_weights_vector, &_recurrent_weights, Window::DimY);
+    _concat_recurrent_weights.configure(compile_context, recurrent_weights_vector, &_recurrent_weights, Window::DimY);
  
      std::vector<const ICLTensor *> weights_vector;
      weights_vector.emplace_back(&_recurrent_weights);
      weights_vector.emplace_back(&_input_weights);
  
      _weights.allocator()->init(TensorInfo(TensorShape(output_size + input_size, 4 * output_size), 1, DataType::QASYMM8, qweights));
-    _concat_weights.configure(weights_vector, &_weights, Window::DimX);
-    _transpose_weights.configure(&_weights, &_weights_transposed);
+    _concat_weights.configure(compile_context, weights_vector, &_weights, Window::DimX);
+    _transpose_weights.configure(compile_context, &_weights, &_weights_transposed);
  
      // Input concatenation
      std::vector<const ICLTensor *> input_vector;
@@ -127,7 +139,7 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
  
      _memory_group.manage(&_input);
      _input.allocator()->init(TensorInfo(TensorShape(output_size + input_size, batch_size), 1, DataType::QASYMM8, qasymm));
-    _concat_inputs.configure(input_vector, &_input, Window::DimX);
+    _concat_inputs.configure(compile_context, input_vector, &_input, Window::DimX);
  
      // Bias concatenation
      std::vector<const ICLTensor *> bias_vector;
@@ -137,7 +149,7 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
      bias_vector.emplace_back(output_gate_bias);
  
      _bias.allocator()->init(TensorInfo(TensorShape(4 * output_size), 1, DataType::S32));
-    _concat_bias.configure(bias_vector, &_bias, Window::DimX);
+    _concat_bias.configure(compile_context, bias_vector, &_bias, Window::DimX);
  
      // Invert the offset for gemmlowp
      _input.info()->set_quantization_info(QuantizationInfo(qasymm.uniform().scale, -qasymm.uniform().offset));
@@ -146,7 +158,7 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
      // Run gemmlowp
      _memory_group.manage(&_output_highp);
      _output_highp.allocator()->init(TensorInfo(TensorShape(4 * output_size, batch_size), 1, DataType::S32));
-    _gemmlowp.configure(&_input, &_weights_transposed, nullptr, &_output_highp);
+    _gemmlowp.configure(compile_context, &_input, &_weights_transposed, nullptr, &_output_highp);
      _input.allocator()->allocate();
  
      // Set the offset back
@@ -162,7 +174,7 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
      quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
  
      _memory_group.manage(&_output_lowp);
-    _output_stage.configure(&_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift);
+    _output_stage.configure(compile_context, &_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift);
      _output_highp.allocator()->allocate();
      _bias.allocator()->allocate();
  
@@ -170,86 +182,86 @@ void CLLSTMLayerQuantized::configure(const ICLTensor *input,
      if(batch_size > 1)
      {
          _memory_group.manage(&_input_gate_input);
-        _slice_input_tensor.configure(&_output_lowp, &_input_gate_input, { 0, 0 }, { output_size, batch_size });
+        _slice_input_tensor.configure(compile_context, &_output_lowp, &_input_gate_input, { 0, 0 }, { output_size, batch_size });
          _memory_group.manage(&_forget_gate_input);
-        _slice_forget_tensor.configure(&_output_lowp, &_forget_gate_input, { output_size, 0 }, { 2 * output_size, batch_size });
+        _slice_forget_tensor.configure(compile_context, &_output_lowp, &_forget_gate_input, { output_size, 0 }, { 2 * output_size, batch_size });
          _memory_group.manage(&_input_modulation_gate_input);
-        _slice_cell_tensor.configure(&_output_lowp, &_input_modulation_gate_input, { 2 * output_size, 0 }, { 3 * output_size, batch_size });
+        _slice_cell_tensor.configure(compile_context, &_output_lowp, &_input_modulation_gate_input, { 2 * output_size, 0 }, { 3 * output_size, batch_size });
          _memory_group.manage(&_output_gate_input);
-        _slice_output_tensor.configure(&_output_lowp, &_output_gate_input, { 3 * output_size, 0 }, { 4 * output_size, batch_size });
+        _slice_output_tensor.configure(compile_context, &_output_lowp, &_output_gate_input, { 3 * output_size, 0 }, { 4 * output_size, batch_size });
          _output_lowp.allocator()->allocate();
      }
      else
      {
          _memory_group.manage(&_input_gate_input);
-        _slice_input_tensor.configure(&_output_lowp, &_input_gate_input, { 0 }, { output_size });
+        _slice_input_tensor.configure(compile_context, &_output_lowp, &_input_gate_input, { 0 }, { output_size });
          _memory_group.manage(&_forget_gate_input);
-        _slice_forget_tensor.configure(&_output_lowp, &_forget_gate_input, { output_size }, { 2 * output_size });
+        _slice_forget_tensor.configure(compile_context, &_output_lowp, &_forget_gate_input, { output_size }, { 2 * output_size });
          _memory_group.manage(&_input_modulation_gate_input);
-        _slice_cell_tensor.configure(&_output_lowp, &_input_modulation_gate_input, { 2 * output_size }, { 3 * output_size });
+        _slice_cell_tensor.configure(compile_context, &_output_lowp, &_input_modulation_gate_input, { 2 * output_size }, { 3 * output_size });
          _memory_group.manage(&_output_gate_input);
-        _slice_output_tensor.configure(&_output_lowp, &_output_gate_input, { 3 * output_size }, { 4 * output_size });
+        _slice_output_tensor.configure(compile_context, &_output_lowp, &_output_gate_input, { 3 * output_size }, { 4 * output_size });
          _output_lowp.allocator()->allocate();
      }
  
      // Forget gate
      _memory_group.manage(&_forget_gate_output);
      _forget_gate_output.allocator()->init(TensorInfo(_forget_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
-    _sigmoid_forget_gate.configure(&_forget_gate_input, &_forget_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+    _sigmoid_forget_gate.configure(compile_context, &_forget_gate_input, &_forget_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
      _forget_gate_input.allocator()->allocate();
  
      // Input gate
      _memory_group.manage(&_input_gate_output);
      _input_gate_output.allocator()->init(TensorInfo(_input_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
-    _sigmoid_input_gate.configure(&_input_gate_input, &_input_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+    _sigmoid_input_gate.configure(compile_context, &_input_gate_input, &_input_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
      _input_gate_input.allocator()->allocate();
  
      // Input modulation gate equation
      _memory_group.manage(&_input_modulation_gate_output);
      _input_modulation_gate_output.allocator()->init(TensorInfo(_input_modulation_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
-    _tanh_modulation_gate.configure(&_input_modulation_gate_input, &_input_modulation_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
+    _tanh_modulation_gate.configure(compile_context, &_input_modulation_gate_input, &_input_modulation_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
      _input_modulation_gate_input.allocator()->allocate();
  
      // Output gate
      _memory_group.manage(&_output_gate_output);
      _output_gate_output.allocator()->init(TensorInfo(_output_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
-    _sigmoid_output_gate.configure(&_output_gate_input, &_output_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+    _sigmoid_output_gate.configure(compile_context, &_output_gate_input, &_output_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
      _output_gate_input.allocator()->allocate();
  
      // Long term memory
      _memory_group.manage(&_cell_state_tmp1);
      _cell_state_tmp1.allocator()->init(TensorInfo(_forget_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_4));
-    _mul_forget_gate_cell_state.configure(&_forget_gate_output, cell_state_in, &_cell_state_tmp1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+    _mul_forget_gate_cell_state.configure(compile_context, &_forget_gate_output, cell_state_in, &_cell_state_tmp1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
      _forget_gate_output.allocator()->allocate();
  
      _memory_group.manage(&_cell_state_tmp2);
      _cell_state_tmp2.allocator()->init(TensorInfo(_input_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_4));
-    _mul_input_gate_input_mod_gate.configure(&_input_gate_output, &_input_modulation_gate_output, &_cell_state_tmp2, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+    _mul_input_gate_input_mod_gate.configure(compile_context, &_input_gate_output, &_input_modulation_gate_output, &_cell_state_tmp2, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
      _input_modulation_gate_output.allocator()->allocate();
      _input_gate_output.allocator()->allocate();
  
-    _add_cell_state_tmps.configure(&_cell_state_tmp1, &_cell_state_tmp2, cell_state_out, ConvertPolicy::SATURATE);
+    _add_cell_state_tmps.configure(compile_context, &_cell_state_tmp1, &_cell_state_tmp2, cell_state_out, ConvertPolicy::SATURATE);
      _cell_state_tmp1.allocator()->allocate();
      _cell_state_tmp2.allocator()->allocate();
  
      // Short term memory
      _memory_group.manage(&_output_state_tmp);
      _output_state_tmp.allocator()->init(TensorInfo(cell_state_out->info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
-    _tanh_output_state.configure(cell_state_out, &_output_state_tmp, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
+    _tanh_output_state.configure(compile_context, cell_state_out, &_output_state_tmp, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
  
      _memory_group.manage(&_output_state_out_symm);
      _output_state_out_symm.allocator()->init(TensorInfo(_output_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
-    _mul_output_state_tmp_output_gate.configure(&_output_state_tmp, &_output_gate_output, &_output_state_out_symm, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+    _mul_output_state_tmp_output_gate.configure(compile_context, &_output_state_tmp, &_output_gate_output, &_output_state_out_symm, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
      _output_gate_output.allocator()->allocate();
      _output_state_tmp.allocator()->allocate();
  
      // Requantize the output state from QSYMM16 to QASYMM8
      _memory_group.manage(&_output_state_out_f32);
      _output_state_out_f32.allocator()->init(TensorInfo(_output_state_out_symm.info()->tensor_shape(), 1, DataType::F32));
-    _dequantize.configure(&_output_state_out_symm, &_output_state_out_f32);
+    _dequantize.configure(compile_context, &_output_state_out_symm, &_output_state_out_f32);
      _output_state_out_symm.allocator()->allocate();
  
-    _quantize.configure(&_output_state_out_f32, output_state_out);
+    _quantize.configure(compile_context, &_output_state_out_f32, output_state_out);
      _output_state_out_f32.allocator()->allocate();
  }
  
diff --git a/src/runtime/CL/functions/CLLaplacianPyramid.cpp b/src/runtime/CL/functions/CLLaplacianPyramid.cpp

index c2c04e6002907fe42cc8b4ba8b586217128150ce..831f0cdcdf1427de97b286b5969f9fb6c5b6b9b7 100644 (file)
--- a/src/runtime/CL/functions/CLLaplacianPyramid.cpp
+++ b/src/runtime/CL/functions/CLLaplacianPyramid.cpp
@@ -47,6 +47,11 @@ CLLaplacianPyramid::CLLaplacianPyramid() // NOLINT
  }
  
  void CLLaplacianPyramid::configure(ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, pyramid, output, border_mode, constant_border_value);
+}
+
+void CLLaplacianPyramid::configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
  {
      ARM_COMPUTE_ERROR_ON(nullptr == pyramid);
      ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
@@ -67,18 +72,18 @@ void CLLaplacianPyramid::configure(ICLTensor *input, CLPyramid *pyramid, ICLTens
      _conv_pyr.init(pyramid_info);
  
      // Create Gaussian Pyramid function
-    _gaussian_pyr_function.configure(input, &_gauss_pyr, border_mode, constant_border_value);
+    _gaussian_pyr_function.configure(compile_context, input, &_gauss_pyr, border_mode, constant_border_value);
  
      _convf.resize(_num_levels);
      _subf.resize(_num_levels);
  
      for(unsigned int i = 0; i < _num_levels; ++i)
      {
-        _convf[i].configure(_gauss_pyr.get_pyramid_level(i), _conv_pyr.get_pyramid_level(i), border_mode, constant_border_value);
-        _subf[i].configure(_gauss_pyr.get_pyramid_level(i), _conv_pyr.get_pyramid_level(i), pyramid->get_pyramid_level(i), ConvertPolicy::WRAP);
+        _convf[i].configure(compile_context, _gauss_pyr.get_pyramid_level(i), _conv_pyr.get_pyramid_level(i), border_mode, constant_border_value);
+        _subf[i].configure(compile_context, _gauss_pyr.get_pyramid_level(i), _conv_pyr.get_pyramid_level(i), pyramid->get_pyramid_level(i), ConvertPolicy::WRAP);
      }
  
-    _depth_function.configure(_conv_pyr.get_pyramid_level(_num_levels - 1), output, ConvertPolicy::WRAP, 0);
+    _depth_function.configure(compile_context, _conv_pyr.get_pyramid_level(_num_levels - 1), output, ConvertPolicy::WRAP, 0);
  
      _gauss_pyr.allocate();
      _conv_pyr.allocate();
diff --git a/src/runtime/CL/functions/CLLaplacianReconstruct.cpp b/src/runtime/CL/functions/CLLaplacianReconstruct.cpp

index 8d1cd98c91dcb44a2ae7f73246af00dc2155e6b3..ea6a3f9a980e1615f03cf549f5616c3ffd40831b 100644 (file)
--- a/src/runtime/CL/functions/CLLaplacianReconstruct.cpp
+++ b/src/runtime/CL/functions/CLLaplacianReconstruct.cpp
@@ -42,6 +42,11 @@ CLLaplacianReconstruct::CLLaplacianReconstruct() // NOLINT
  }
  
  void CLLaplacianReconstruct::configure(const CLPyramid *pyramid, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), pyramid, input, output, border_mode, constant_border_value);
+}
+
+void CLLaplacianReconstruct::configure(const CLCompileContext &compile_context, const CLPyramid *pyramid, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
  {
      ARM_COMPUTE_ERROR_ON(nullptr == pyramid);
      ARM_COMPUTE_ERROR_ON(input == output);
@@ -67,17 +72,17 @@ void CLLaplacianReconstruct::configure(const CLPyramid *pyramid, ICLTensor *inpu
  
      const size_t last_level = num_levels - 1;
  
-    _addf[last_level].configure(input, pyramid->get_pyramid_level(last_level), _tmp_pyr.get_pyramid_level(last_level), ConvertPolicy::SATURATE);
+    _addf[last_level].configure(compile_context, input, pyramid->get_pyramid_level(last_level), _tmp_pyr.get_pyramid_level(last_level), ConvertPolicy::SATURATE);
  
      // Scale levels n-1 to 1, and add levels n-2 to 0
      for(size_t l = 0; l < last_level; ++l)
      {
-        _scalef[l].configure(_tmp_pyr.get_pyramid_level(l + 1), _tmp_pyr.get_pyramid_level(l), arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, constant_border_value);
-        _addf[l].configure(_tmp_pyr.get_pyramid_level(l), pyramid->get_pyramid_level(l), _tmp_pyr.get_pyramid_level(l), ConvertPolicy::SATURATE);
+        _scalef[l].configure(compile_context, _tmp_pyr.get_pyramid_level(l + 1), _tmp_pyr.get_pyramid_level(l), arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, constant_border_value);
+        _addf[l].configure(compile_context, _tmp_pyr.get_pyramid_level(l), pyramid->get_pyramid_level(l), _tmp_pyr.get_pyramid_level(l), ConvertPolicy::SATURATE);
      }
  
      // Convert level 0 from S16 to U8
-    _depthf.configure(_tmp_pyr.get_pyramid_level(0), output, ConvertPolicy::SATURATE, 0);
+    _depthf.configure(compile_context, _tmp_pyr.get_pyramid_level(0), output, ConvertPolicy::SATURATE, 0);
  
      _tmp_pyr.allocate();
  }
diff --git a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp

index 3e99dde253f31109c1939940e7048522ae4678ba..950be5030f34fc5dcba1b882742f6b824c00eeb6 100644 (file)
--- a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -128,6 +128,12 @@ Status CLLocallyConnectedLayer::validate(const ITensorInfo *input, const ITensor
  }
  
  void CLLocallyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info);
+}
+
+void CLLocallyConnectedLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+                                        const PadStrideInfo &conv_info)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
      ARM_COMPUTE_ERROR_THROW_ON(CLLocallyConnectedLayer::validate(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), conv_info));
@@ -160,10 +166,10 @@ void CLLocallyConnectedLayer::configure(const ICLTensor *input, const ICLTensor
      _memory_group.manage(&_gemm_output);
  
      // Configure kernels
-    _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
-    _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped);
-    _mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output);
-    _output_col2im_kernel.configure(&_gemm_output, output, Size2D(conv_w, conv_h));
+    _input_im2col_kernel.configure(compile_context, input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
+    _weights_reshape_kernel.configure(compile_context, weights, biases, &_weights_reshaped);
+    _mm_kernel.configure(compile_context, &_input_im2col_reshaped, &_weights_reshaped, &_gemm_output);
+    _output_col2im_kernel.configure(compile_context, &_gemm_output, output, Size2D(conv_w, conv_h));
  
      // Allocate intermediate tensors
      _input_im2col_reshaped.allocator()->allocate();
diff --git a/src/runtime/CL/functions/CLMagnitude.cpp b/src/runtime/CL/functions/CLMagnitude.cpp

index 9ca9d167ebb56c2ef8021b803a2967c0e52fda09..a267952d4a3b34d752f180ea004ae992832814c3 100644 (file)
--- a/src/runtime/CL/functions/CLMagnitude.cpp
+++ b/src/runtime/CL/functions/CLMagnitude.cpp
@@ -31,8 +31,13 @@
  using namespace arm_compute;
  
  void CLMagnitude::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, mag_type);
+}
+
+void CLMagnitude::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLMagnitudePhaseKernel>();
-    k->configure(input1, input2, output, nullptr, mag_type);
+    k->configure(compile_context, input1, input2, output, nullptr, mag_type);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLMeanStdDev.cpp b/src/runtime/CL/functions/CLMeanStdDev.cpp

index 8517b59e7a0bff2254839a72ca0f8c85c933e368..e3ce704bfb3b7e4604be2b32b2580607ec9c23f4 100644 (file)
--- a/src/runtime/CL/functions/CLMeanStdDev.cpp
+++ b/src/runtime/CL/functions/CLMeanStdDev.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -65,6 +65,11 @@ Status CLMeanStdDev::validate(ITensorInfo *input, float *mean, float *stddev)
  }
  
  void CLMeanStdDev::configure(ICLImage *input, float *mean, float *stddev)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, mean, stddev);
+}
+
+void CLMeanStdDev::configure(const CLCompileContext &compile_context, ICLImage *input, float *mean, float *stddev)
  {
      // In the case of F16/F32 we call reduction operation for calculating CLMeanStdDev
      _data_type = input->info()->data_type();
@@ -74,14 +79,14 @@ void CLMeanStdDev::configure(ICLImage *input, float *mean, float *stddev)
          _num_pixels = input->info()->dimension(0) * input->info()->dimension(1);
  
          _memory_group.manage(&_reduction_output_mean);
-        _reduction_operation_mean.configure(input, &_reduction_output_mean, 0, ReductionOperation::SUM);
+        _reduction_operation_mean.configure(compile_context, input, &_reduction_output_mean, 0, ReductionOperation::SUM);
          _reduction_output_mean.allocator()->allocate();
          _mean = mean;
  
          if(stddev != nullptr)
          {
              _memory_group.manage(&_reduction_output_stddev);
-            _reduction_operation_stddev.configure(input, &_reduction_output_stddev, 0, ReductionOperation::SUM_SQUARE);
+            _reduction_operation_stddev.configure(compile_context, input, &_reduction_output_stddev, 0, ReductionOperation::SUM_SQUARE);
              _reduction_output_stddev.allocator()->allocate();
              _stddev     = stddev;
              _run_stddev = true;
@@ -96,8 +101,8 @@ void CLMeanStdDev::configure(ICLImage *input, float *mean, float *stddev)
              _global_sum_squared = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
          }
  
-        _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared);
-        _fill_border_kernel.configure(input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
+        _mean_stddev_kernel.configure(compile_context, input, mean, &_global_sum, stddev, &_global_sum_squared);
+        _fill_border_kernel.configure(compile_context, input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
      }
  }
  
diff --git a/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp b/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp

index 9b5e707665893736e18f762c7a0bff13c3a6bbc4..3dbab76c726badfc6e336c905ab1b5c6b2fbd9bc 100644 (file)
--- a/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp
@@ -30,9 +30,14 @@
  namespace arm_compute
  {
  void CLMeanStdDevNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, float epsilon)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, epsilon);
+}
+
+void CLMeanStdDevNormalizationLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, float epsilon)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLMeanStdDevNormalizationKernel>();
-    k->configure(input, output, epsilon);
+    k->configure(compile_context, input, output, epsilon);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLMedian3x3.cpp b/src/runtime/CL/functions/CLMedian3x3.cpp

index 7c5c422822187542bbd153b379decaa2e6df0a86..dc53240f79e4ed99e83a930a851ef8468a192558 100644 (file)
--- a/src/runtime/CL/functions/CLMedian3x3.cpp
+++ b/src/runtime/CL/functions/CLMedian3x3.cpp
@@ -32,9 +32,14 @@
  using namespace arm_compute;
  
  void CLMedian3x3::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value);
+}
+
+void CLMedian3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLMedian3x3Kernel>();
-    k->configure(input, output, border_mode == BorderMode::UNDEFINED);
+    k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
      _kernel = std::move(k);
-    _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
  }
diff --git a/src/runtime/CL/functions/CLMinMaxLocation.cpp b/src/runtime/CL/functions/CLMinMaxLocation.cpp

index 49dcbcb7dfc5d739ef3afaf7a609f4abed734176..15b28330b5d9f02a94e6dadcfb8321741fd4e3ff 100644 (file)
--- a/src/runtime/CL/functions/CLMinMaxLocation.cpp
+++ b/src/runtime/CL/functions/CLMinMaxLocation.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -42,6 +42,13 @@ CLMinMaxLocation::CLMinMaxLocation()
  }
  
  void CLMinMaxLocation::configure(const ICLImage *input, void *min, void *max, CLCoordinates2DArray *min_loc, CLCoordinates2DArray *max_loc, uint32_t *min_count, uint32_t *max_count)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, min, max, min_loc, max_loc, min_count, max_count);
+}
+
+void CLMinMaxLocation::configure(const CLCompileContext &compile_context, const ICLImage *input, void *min, void *max, CLCoordinates2DArray *min_loc, CLCoordinates2DArray *max_loc,
+                                 uint32_t *min_count,
+                                 uint32_t *max_count)
  {
      ARM_COMPUTE_ERROR_ON(nullptr == min);
      ARM_COMPUTE_ERROR_ON(nullptr == max);
@@ -55,8 +62,8 @@ void CLMinMaxLocation::configure(const ICLImage *input, void *min, void *max, CL
      _min_loc            = min_loc;
      _max_loc            = max_loc;
  
-    _min_max_kernel.configure(input, &_min_max_vals);
-    _min_max_loc_kernel.configure(input, &_min_max_vals, &_min_max_count_vals, _min_loc, _max_loc);
+    _min_max_kernel.configure(compile_context, input, &_min_max_vals);
+    _min_max_loc_kernel.configure(compile_context, input, &_min_max_vals, &_min_max_count_vals, _min_loc, _max_loc);
  }
  
  void CLMinMaxLocation::run()
diff --git a/src/runtime/CL/functions/CLNonLinearFilter.cpp b/src/runtime/CL/functions/CLNonLinearFilter.cpp

index 843a756feaf5d20f966b7ac8026dc5ef1f938cf2..96912a21cddbbbc2ae6d927ea4b4b8741bdaa36f 100644 (file)
--- a/src/runtime/CL/functions/CLNonLinearFilter.cpp
+++ b/src/runtime/CL/functions/CLNonLinearFilter.cpp
@@ -32,9 +32,15 @@ using namespace arm_compute;
  
  void CLNonLinearFilter::configure(ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
                                    BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, function, mask_size, pattern, mask, border_mode, constant_border_value);
+}
+
+void CLNonLinearFilter::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern,
+                                  const uint8_t *mask, BorderMode border_mode, uint8_t constant_border_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLNonLinearFilterKernel>();
-    k->configure(input, output, function, mask_size, pattern, mask, border_mode == BorderMode::UNDEFINED);
+    k->configure(compile_context, input, output, function, mask_size, pattern, mask, border_mode == BorderMode::UNDEFINED);
      _kernel = std::move(k);
-    _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
  }
diff --git a/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp b/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp

index 2f9c02dbb6d225198fced73a2468b250c46c1235..6d4a28db26bfe80d3110f3c4e14e55e4fa107b44 100644 (file)
--- a/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp
+++ b/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp
@@ -31,17 +31,22 @@
  using namespace arm_compute;
  
  void CLNonMaximaSuppression3x3::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode);
+}
+
+void CLNonMaximaSuppression3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLNonMaximaSuppression3x3Kernel>();
-    k->configure(input, output, border_mode == BorderMode::UNDEFINED);
+    k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
      _kernel = std::move(k);
  
      if(border_mode != BorderMode::UNDEFINED)
      {
-        _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT);
+        _border_handler.configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT);
      }
      else
      {
-        _border_handler.configure(input, _kernel->border_size(), BorderMode::UNDEFINED);
+        _border_handler.configure(compile_context, input, _kernel->border_size(), BorderMode::UNDEFINED);
      }
  }
diff --git a/src/runtime/CL/functions/CLNormalizationLayer.cpp b/src/runtime/CL/functions/CLNormalizationLayer.cpp

index 8489fab68bd04020b5c5e34d0013552da51962d2..f59a4ca959ac703c990f0adc316fd90d3ccd0882 100644 (file)
--- a/src/runtime/CL/functions/CLNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLNormalizationLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -38,14 +38,19 @@ CLNormalizationLayer::CLNormalizationLayer()
  }
  
  void CLNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, norm_info);
+}
+
+void CLNormalizationLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info)
  {
      ARM_COMPUTE_ERROR_ON(input == nullptr);
  
      // Configure normalization kernel
-    _norm_kernel.configure(input, output, norm_info);
+    _norm_kernel.configure(compile_context, input, output, norm_info);
  
      // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel
-    _border_handler.configure(input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue());
+    _border_handler.configure(compile_context, input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue());
  }
  
  Status CLNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info)
diff --git a/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp b/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp

index 33b993a003328872fe4a672e0947aaae94bfcd0f..b03de6475b35178cdf913532b762770272e06082 100644 (file)
--- a/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp
+++ b/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp
@@ -32,9 +32,14 @@
  namespace arm_compute
  {
  void CLNormalizePlanarYUVLayer::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, mean, std);
+}
+
+void CLNormalizePlanarYUVLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLNormalizePlanarYUVLayerKernel>();
-    k->configure(input, output, mean, std);
+    k->configure(compile_context, input, output, mean, std);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLOpticalFlow.cpp b/src/runtime/CL/functions/CLOpticalFlow.cpp

index 5e49937b5fe6dfbd9f8c63e02d3e114ee0dad6d1..5f7c1704ee9b757b24db7236218bd81bb40a7be2 100644 (file)
--- a/src/runtime/CL/functions/CLOpticalFlow.cpp
+++ b/src/runtime/CL/functions/CLOpticalFlow.cpp
@@ -61,6 +61,15 @@ void CLOpticalFlow::configure(const CLPyramid *old_pyramid, const CLPyramid *new
                                const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points,
                                Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate,
                                BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), old_pyramid, new_pyramid, old_points, new_points_estimates, new_points, termination, epsilon, num_iterations, window_dimension,
+              use_initial_estimate, border_mode, constant_border_value);
+}
+
+void CLOpticalFlow::configure(const CLCompileContext &compile_context, const CLPyramid *old_pyramid, const CLPyramid *new_pyramid,
+                              const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points,
+                              Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate,
+                              BorderMode border_mode, uint8_t constant_border_value)
  {
      ARM_COMPUTE_ERROR_ON(nullptr == old_pyramid);
      ARM_COMPUTE_ERROR_ON(nullptr == new_pyramid);
@@ -122,18 +131,18 @@ void CLOpticalFlow::configure(const CLPyramid *old_pyramid, const CLPyramid *new
          _memory_group.manage(&_scharr_gy[i]);
  
          // Init Scharr kernel
-        _func_scharr[i].configure(old_ith_input, &_scharr_gx[i], &_scharr_gy[i], border_mode, constant_border_value);
+        _func_scharr[i].configure(compile_context, old_ith_input, &_scharr_gx[i], &_scharr_gy[i], border_mode, constant_border_value);
  
          // Init Lucas-Kanade init kernel
-        _tracker_init_kernel[i].configure(old_points, new_points_estimates, _old_points_internal.get(), _new_points_internal.get(), use_initial_estimate, i, _num_levels, pyr_scale);
+        _tracker_init_kernel[i].configure(compile_context, old_points, new_points_estimates, _old_points_internal.get(), _new_points_internal.get(), use_initial_estimate, i, _num_levels, pyr_scale);
  
          // Init Lucas-Kanade stage0 kernel
-        _tracker_stage0_kernel[i].configure(old_ith_input, &_scharr_gx[i], &_scharr_gy[i],
+        _tracker_stage0_kernel[i].configure(compile_context, old_ith_input, &_scharr_gx[i], &_scharr_gy[i],
                                              _old_points_internal.get(), _new_points_internal.get(), _coefficient_table.get(), _old_values.get(),
                                              window_dimension, i);
  
          // Init Lucas-Kanade stage1 kernel
-        _tracker_stage1_kernel[i].configure(new_ith_input, _new_points_internal.get(), _coefficient_table.get(), _old_values.get(),
+        _tracker_stage1_kernel[i].configure(compile_context, new_ith_input, _new_points_internal.get(), _coefficient_table.get(), _old_values.get(),
                                              termination, epsilon, num_iterations, window_dimension, i);
  
          // Allocate intermediate buffers
@@ -142,7 +151,7 @@ void CLOpticalFlow::configure(const CLPyramid *old_pyramid, const CLPyramid *new
      }
  
      // Finalize Lucas-Kanade
-    _tracker_finalize_kernel.configure(_new_points_internal.get(), new_points);
+    _tracker_finalize_kernel.configure(compile_context, _new_points_internal.get(), new_points);
  }
  
  void CLOpticalFlow::run()
diff --git a/src/runtime/CL/functions/CLPReluLayer.cpp b/src/runtime/CL/functions/CLPReluLayer.cpp

index ab4f53d960225198592f4236fdddc4b1a16e890b..6543ab922e8f4a9cba6db062755717dcd271a8d1 100644 (file)
--- a/src/runtime/CL/functions/CLPReluLayer.cpp
+++ b/src/runtime/CL/functions/CLPReluLayer.cpp
@@ -31,7 +31,7 @@ namespace arm_compute
  {
  namespace
  {
-void configure_border_handler(CLFillBorderKernel &border_handler, BorderSize border_size, ICLTensor *input1, ICLTensor *input2, const ICLTensor *output)
+void configure_border_handler(const CLCompileContext &compile_context, CLFillBorderKernel &border_handler, BorderSize border_size, ICLTensor *input1, ICLTensor *input2, const ICLTensor *output)
  {
      if(output->info()->dimension(0) > 1)
      {
@@ -39,18 +39,23 @@ void configure_border_handler(CLFillBorderKernel &border_handler, BorderSize bor
  
          if(broadcasted_info->info()->dimension(0) == 1)
          {
-            border_handler.configure(broadcasted_info, border_size, BorderMode::REPLICATE);
+            border_handler.configure(compile_context, broadcasted_info, border_size, BorderMode::REPLICATE);
          }
      }
  }
  } // namespace
  
  void CLPReluLayer::configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, alpha, output);
+}
+
+void CLPReluLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *alpha, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLArithmeticOperationKernel>();
-    k->configure(ArithmeticOperation::PRELU, input, alpha, output);
+    k->configure(compile_context, ArithmeticOperation::PRELU, input, alpha, output);
      _kernel = std::move(k);
-    configure_border_handler(_border_handler, _kernel->border_size(), input, alpha, output);
+    configure_border_handler(compile_context, _border_handler, _kernel->border_size(), input, alpha, output);
  }
  
  Status CLPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
diff --git a/src/runtime/CL/functions/CLPadLayer.cpp b/src/runtime/CL/functions/CLPadLayer.cpp

index 8f36a69866613d3bde8ee99c4077b9be8394d7f8..078bdbc51fb61447abb39c3f35a847c7ba90b0e5 100644 (file)
--- a/src/runtime/CL/functions/CLPadLayer.cpp
+++ b/src/runtime/CL/functions/CLPadLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -31,6 +31,11 @@ CLPadLayer::CLPadLayer()
  }
  
  void CLPadLayer::configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value, mode);
+}
+
+void CLPadLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
  {
      ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), padding, constant_value, mode));
  
@@ -41,12 +46,12 @@ void CLPadLayer::configure(ICLTensor *input, ICLTensor *output, const PaddingLis
  
      if(_perform_pad)
      {
-        _pad_kernel.configure(input, output, padding, constant_value, mode);
+        _pad_kernel.configure(compile_context, input, output, padding, constant_value, mode);
      }
      else
      {
          // Copy the input to the whole output if no padding is applied
-        _copy_kernel.configure(input, output);
+        _copy_kernel.configure(compile_context, input, output);
      }
  }
  Status CLPadLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
diff --git a/src/runtime/CL/functions/CLPermute.cpp b/src/runtime/CL/functions/CLPermute.cpp

index 6b88ef86acca871cda436476c6d213bb86429e35..e6323ce504a6bfc89015bf2739327edc1cb48a71 100644 (file)
--- a/src/runtime/CL/functions/CLPermute.cpp
+++ b/src/runtime/CL/functions/CLPermute.cpp
@@ -31,9 +31,14 @@
  namespace arm_compute
  {
  void CLPermute::configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, perm);
+}
+
+void CLPermute::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLPermuteKernel>();
-    k->configure(input, output, perm);
+    k->configure(compile_context, input, output, perm);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLPhase.cpp b/src/runtime/CL/functions/CLPhase.cpp

index 537dda02f433abc9cfcac82d57444dbec0dbf261..b915104f38903f76e0e79d9b89bfb9783c5410f2 100644 (file)
--- a/src/runtime/CL/functions/CLPhase.cpp
+++ b/src/runtime/CL/functions/CLPhase.cpp
@@ -31,8 +31,13 @@
  using namespace arm_compute;
  
  void CLPhase::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, phase_type);
+}
+
+void CLPhase::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLMagnitudePhaseKernel>();
-    k->configure(input1, input2, nullptr, output, MagnitudeType::L1NORM, phase_type);
+    k->configure(compile_context, input1, input2, nullptr, output, MagnitudeType::L1NORM, phase_type);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp

index b527922d2b409767d294c87b9e2340af775148e7..3c1a7de76d0ff733fa8d2d857e54bd371d68244a 100644 (file)
--- a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp
+++ b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp
@@ -33,9 +33,15 @@ namespace arm_compute
  {
  void CLPixelWiseMultiplication::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale,
                                            ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, scale, overflow_policy, rounding_policy, act_info);
+}
+
+void CLPixelWiseMultiplication::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale,
+                                          ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLPixelWiseMultiplicationKernel>();
-    k->configure(input1, input2, output, scale, overflow_policy, rounding_policy, act_info);
+    k->configure(compile_context, input1, input2, output, scale, overflow_policy, rounding_policy, act_info);
      _kernel = std::move(k);
  
      if(output->info()->dimension(0) > 1)
@@ -44,7 +50,7 @@ void CLPixelWiseMultiplication::configure(ICLTensor *input1, ICLTensor *input2,
  
          if(broadcasted_info->info()->dimension(0) == 1)
          {
-            _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+            _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
          }
      }
  }
@@ -56,9 +62,14 @@ Status CLPixelWiseMultiplication::validate(const ITensorInfo *input1, const ITen
  }
  
  void CLComplexPixelWiseMultiplication::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info);
+}
+
+void CLComplexPixelWiseMultiplication::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLComplexPixelWiseMultiplicationKernel>();
-    k->configure(input1, input2, output, act_info);
+    k->configure(compile_context, input1, input2, output, act_info);
      _kernel = std::move(k);
  
      if(output->info()->dimension(0) > 1)
@@ -67,7 +78,7 @@ void CLComplexPixelWiseMultiplication::configure(ICLTensor *input1, ICLTensor *i
  
          if(broadcasted_info->info()->dimension(0) == 1)
          {
-            _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+            _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
          }
      }
  }
diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp

index 9c4fa4a2bafccf1be1e8fec851f257b6caf9de77..e7735b00df4020645a3a895f7230758860ddaad3 100644 (file)
--- a/src/runtime/CL/functions/CLPoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLPoolingLayer.cpp
@@ -31,12 +31,17 @@
  namespace arm_compute
  {
  void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, pool_info, indices);
+}
+
+void CLPoolingLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input);
      // Configure pooling kernel
      auto k = arm_compute::support::cpp14::make_unique<CLPoolingLayerKernel>();
      k->set_target(CLScheduler::get().target());
-    k->configure(input, output, pool_info, indices);
+    k->configure(compile_context, input, output, pool_info, indices);
      _kernel = std::move(k);
  
      const DataType data_type = input->info()->data_type();
@@ -74,7 +79,7 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin
          default:
              ARM_COMPUTE_ERROR("Data layout not supported");
      }
-    _border_handler.configure(input, _kernel->border_size(), border_mode, pixel_value);
+    _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, pixel_value);
  
      // Tune kernels
      CLScheduler::get().tune_kernel_static(*_kernel);
diff --git a/src/runtime/CL/functions/CLPriorBoxLayer.cpp b/src/runtime/CL/functions/CLPriorBoxLayer.cpp

index 4f6c969a921dfe13bbf4cf57b2afe148de752238..d01b4c711bb7daee3d8df89770eaf3a4f0b1b29d 100644 (file)
--- a/src/runtime/CL/functions/CLPriorBoxLayer.cpp
+++ b/src/runtime/CL/functions/CLPriorBoxLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -39,6 +39,11 @@ CLPriorBoxLayer::CLPriorBoxLayer()
  }
  
  void CLPriorBoxLayer::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, info);
+}
+
+void CLPriorBoxLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info)
  {
      _min           = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, info.min_sizes().size() * sizeof(float));
      _aspect_ratios = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, info.aspect_ratios().size() * sizeof(float));
@@ -48,7 +53,7 @@ void CLPriorBoxLayer::configure(const ICLTensor *input1, const ICLTensor *input2
      }
  
      auto k = arm_compute::support::cpp14::make_unique<CLPriorBoxLayerKernel>();
-    k->configure(input1, input2, output, info, &_min, &_max, &_aspect_ratios);
+    k->configure(compile_context, input1, input2, output, info, &_min, &_max, &_aspect_ratios);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLQLSTMLayer.cpp b/src/runtime/CL/functions/CLQLSTMLayer.cpp

index 4b994d47b2b011574ab8446ec48cee5af179f2ac..88c5f77b9f528f93278425d8d278d125ee8802e1 100644 (file)
--- a/src/runtime/CL/functions/CLQLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLQLSTMLayer.cpp
@@ -51,7 +51,7 @@ CLQLSTMLayer::CLQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
      _memory_group = MemoryGroup(std::move(memory_manager));
  }
  
-void CLQLSTMLayer::configure_mm(CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
+void CLQLSTMLayer::configure_mm(const CLCompileContext &compile_context, CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
                                  const ICLTensor *mm_input, const ICLTensor *mm_weights, const ICLTensor *bias,
                                  CLTensor *mm_res, CLTensor *outstage_res, float gemmlowp_scale,
                                  const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info)
@@ -63,11 +63,11 @@ void CLQLSTMLayer::configure_mm(CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutp
      outstage_res->allocator()->init(outstage_tensor_info);
  
      // Configure matrix-multiplication
-    mm.configure(mm_input, mm_weights, nullptr, mm_res);
+    mm.configure(compile_context, mm_input, mm_weights, nullptr, mm_res);
  
      // Configure output stage
      quantization::calculate_quantized_multiplier(gemmlowp_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
-    outstage.configure(mm_res, bias, outstage_res, gemmlowp_info);
+    outstage.configure(compile_context, mm_res, bias, outstage_res, gemmlowp_info);
      mm_res->allocator()->allocate();
  }
  
@@ -78,6 +78,19 @@ void CLQLSTMLayer::configure(const ICLTensor *input,
                               const ICLTensor *cell_state_in, const ICLTensor *output_state_in,
                               ICLTensor *cell_state_out, ICLTensor *output_state_out,
                               const LSTMParams<ICLTensor> &lstm_params)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
+              recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias,
+              cell_state_in, output_state_in, cell_state_out, output_state_out, lstm_params);
+}
+
+void CLQLSTMLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input,
+                             const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
+                             const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
+                             const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
+                             const ICLTensor *cell_state_in, const ICLTensor *output_state_in,
+                             ICLTensor *cell_state_out, ICLTensor *output_state_out,
+                             const LSTMParams<ICLTensor> &lstm_params)
  {
      ARM_COMPUTE_UNUSED(forget_gate_bias);
      ARM_COMPUTE_UNUSED(cell_bias);
@@ -133,36 +146,36 @@ void CLQLSTMLayer::configure(const ICLTensor *input,
          _input_to_input_weights     = lstm_params.input_to_input_weights();
          _recurrent_to_input_weights = lstm_params.recurrent_to_input_weights();
  
-        _input_to_input_reduction.configure(_input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
-        _recurrent_to_input_reduction.configure(_recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+        _input_to_input_reduction.configure(compile_context, _input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+        _recurrent_to_input_reduction.configure(compile_context, _recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
      }
-    _input_to_forget_reduction.configure(input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
-    _recurrent_to_forget_reduction.configure(recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
-    _input_to_cell_reduction.configure(input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
-    _recurrent_to_cell_reduction.configure(recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
-    _input_to_output_reduction.configure(input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
-    _recurrent_to_output_reduction.configure(recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+    _input_to_forget_reduction.configure(compile_context, input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+    _recurrent_to_forget_reduction.configure(compile_context, recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+    _input_to_cell_reduction.configure(compile_context, input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+    _recurrent_to_cell_reduction.configure(compile_context, recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+    _input_to_output_reduction.configure(compile_context, input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+    _recurrent_to_output_reduction.configure(compile_context, recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
      if(_projection_bias != nullptr)
      {
-        _projection_reduction.configure(_projection_weights, &_projection_reduction_res, GEMMLowpReductionKernelInfo(num_units, false, lstm_params.hidden_state_zero(), true));
-        _projection_bias_add.configure(ArithmeticOperation::ADD, _projection_bias, &_projection_reduction_res, &_projection_eff_bias, ConvertPolicy::SATURATE);
+        _projection_reduction.configure(compile_context, _projection_weights, &_projection_reduction_res, GEMMLowpReductionKernelInfo(num_units, false, lstm_params.hidden_state_zero(), true));
+        _projection_bias_add.configure(compile_context, ArithmeticOperation::ADD, _projection_bias, &_projection_reduction_res, &_projection_eff_bias, ConvertPolicy::SATURATE);
      }
  
      // Pre-transpose weights to be used in GEMM.
-    _transpose_input_to_forget_weights.configure(input_to_forget_weights, &_input_to_forget_weights_transposed);
-    _transpose_input_to_cell_weights.configure(input_to_cell_weights, &_input_to_cell_weights_transposed);
-    _transpose_input_to_output_weights.configure(input_to_output_weights, &_input_to_output_weights_transposed);
-    _transpose_recurrent_to_forget_weights.configure(recurrent_to_forget_weights, &_recurrent_to_forget_weights_transposed);
-    _transpose_recurrent_to_cell_weights.configure(recurrent_to_cell_weights, &_recurrent_to_cell_weights_transposed);
-    _transpose_recurrent_to_output_weights.configure(recurrent_to_output_weights, &_recurrent_to_output_weights_transposed);
+    _transpose_input_to_forget_weights.configure(compile_context, input_to_forget_weights, &_input_to_forget_weights_transposed);
+    _transpose_input_to_cell_weights.configure(compile_context, input_to_cell_weights, &_input_to_cell_weights_transposed);
+    _transpose_input_to_output_weights.configure(compile_context, input_to_output_weights, &_input_to_output_weights_transposed);
+    _transpose_recurrent_to_forget_weights.configure(compile_context, recurrent_to_forget_weights, &_recurrent_to_forget_weights_transposed);
+    _transpose_recurrent_to_cell_weights.configure(compile_context, recurrent_to_cell_weights, &_recurrent_to_cell_weights_transposed);
+    _transpose_recurrent_to_output_weights.configure(compile_context, recurrent_to_output_weights, &_recurrent_to_output_weights_transposed);
      if(!_has_cifg)
      {
-        _transpose_input_to_input_weights.configure(lstm_params.input_to_input_weights(), &_input_to_input_weights_transposed);
-        _transpose_recurrent_to_input_weights.configure(lstm_params.recurrent_to_input_weights(), &_recurrent_to_input_weights_transposed);
+        _transpose_input_to_input_weights.configure(compile_context, lstm_params.input_to_input_weights(), &_input_to_input_weights_transposed);
+        _transpose_recurrent_to_input_weights.configure(compile_context, lstm_params.recurrent_to_input_weights(), &_recurrent_to_input_weights_transposed);
      }
      if(_has_projection)
      {
-        _transpose_projection_weights.configure(_projection_weights, &_projection_weights_transposed);
+        _transpose_projection_weights.configure(compile_context, _projection_weights, &_projection_weights_transposed);
      }
  
      GEMMLowpOutputStageInfo gemmlowp_info;
@@ -175,31 +188,33 @@ void CLQLSTMLayer::configure(const ICLTensor *input,
      // Forget gate.
      const TensorInfo forget_gate_outstage_info(mm_out_info.tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.forget_intermediate_scale(), 0));
      const float      input_to_forget_scale = input_to_forget_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.forget_intermediate_scale();
-    configure_mm(_mm_input_to_forget, _input_to_forget_outstage, gemmlowp_info,
+    configure_mm(compile_context, _mm_input_to_forget, _input_to_forget_outstage, gemmlowp_info,
                   input, &_input_to_forget_weights_transposed, &_input_to_forget_eff_bias,
                   &_mm_input_to_forget_res, &_input_to_forget_outstage_res, input_to_forget_scale,
                   mm_out_info, forget_gate_outstage_info);
  
      const float recurrent_to_forget_scale = recurrent_to_forget_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.forget_intermediate_scale();
-    configure_mm(_mm_recurrent_to_forget, _recurrent_to_forget_outstage, gemmlowp_info,
+    configure_mm(compile_context, _mm_recurrent_to_forget, _recurrent_to_forget_outstage, gemmlowp_info,
                   output_state_in, &_recurrent_to_forget_weights_transposed, &_recurrent_to_forget_eff_bias,
                   &_mm_recurrent_to_forget_res, &_recurrent_to_forget_outstage_res, recurrent_to_forget_scale,
                   mm_out_info, forget_gate_outstage_info);
  
-    _accumulate_input_recurrent_forget.configure(ArithmeticOperation::ADD, &_input_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, ConvertPolicy::SATURATE);
+    _accumulate_input_recurrent_forget.configure(compile_context, ArithmeticOperation::ADD, &_input_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
+                                                 ConvertPolicy::SATURATE);
      _input_to_forget_outstage_res.allocator()->allocate();
  
      if(_has_peephole)
      {
          _memory_group.manage(&_mul_cell_to_forget_res);
-        _pixelwise_mul_cell_to_forget.configure(cell_state_in, lstm_params.cell_to_forget_weights(), &_mul_cell_to_forget_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+        _pixelwise_mul_cell_to_forget.configure(compile_context, cell_state_in, lstm_params.cell_to_forget_weights(), &_mul_cell_to_forget_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
          _cell_to_forget_outstage_res.allocator()->init(TensorInfo(_mul_cell_to_forget_res.info()->tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.forget_intermediate_scale(), 0)));
          _memory_group.manage(&_cell_to_forget_outstage_res);
          const float cell_to_forget_scale = std::pow(2, cell_shift) * lstm_params.cell_to_forget_weights()->info()->quantization_info().uniform().scale / lstm_params.forget_intermediate_scale();
          quantization::calculate_quantized_multiplier(cell_to_forget_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
-        _cell_to_forget_outstage.configure(&_mul_cell_to_forget_res, nullptr, &_cell_to_forget_outstage_res, gemmlowp_info);
+        _cell_to_forget_outstage.configure(compile_context, &_mul_cell_to_forget_res, nullptr, &_cell_to_forget_outstage_res, gemmlowp_info);
          _mul_cell_to_forget_res.allocator()->allocate();
-        _accumulate_cell_forget.configure(ArithmeticOperation::ADD, &_recurrent_to_forget_outstage_res, &_cell_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, ConvertPolicy::SATURATE);
+        _accumulate_cell_forget.configure(compile_context, ArithmeticOperation::ADD, &_recurrent_to_forget_outstage_res, &_cell_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
+                                          ConvertPolicy::SATURATE);
          _cell_to_forget_outstage_res.allocator()->allocate();
      }
  
@@ -209,30 +224,31 @@ void CLQLSTMLayer::configure(const ICLTensor *input,
      const TensorInfo forget_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
      _memory_group.manage(&_forget_gate);
      _forget_gate.allocator()->init(forget_gate_info);
-    _forget_gate_sigmoid.configure(&_recurrent_to_forget_outstage_res, &_forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+    _forget_gate_sigmoid.configure(compile_context, &_recurrent_to_forget_outstage_res, &_forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
      _recurrent_to_forget_outstage_res.allocator()->allocate();
  
      // Modulation gate.
      const TensorInfo cell_outstage_info(mm_out_info.tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.cell_intermediate_scale(), 0));
      const float      input_to_cell_scale = input_to_cell_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.cell_intermediate_scale();
-    configure_mm(_mm_input_to_cell, _input_to_cell_outstage, gemmlowp_info,
+    configure_mm(compile_context, _mm_input_to_cell, _input_to_cell_outstage, gemmlowp_info,
                   input, &_input_to_cell_weights_transposed, &_input_to_cell_eff_bias,
                   &_mm_input_to_cell_res, &_input_to_cell_outstage_res, input_to_cell_scale,
                   mm_out_info, cell_outstage_info);
  
      const float recurrent_to_cell_scale = recurrent_to_cell_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.cell_intermediate_scale();
-    configure_mm(_mm_recurrent_to_cell, _recurrent_to_cell_outstage, gemmlowp_info,
+    configure_mm(compile_context, _mm_recurrent_to_cell, _recurrent_to_cell_outstage, gemmlowp_info,
                   output_state_in, &_recurrent_to_cell_weights_transposed, &_recurrent_to_cell_eff_bias,
                   &_mm_recurrent_to_cell_res, &_recurrent_to_cell_outstage_res, recurrent_to_cell_scale,
                   mm_out_info, cell_outstage_info);
  
-    _accumulate_input_recurrent_modulation.configure(ArithmeticOperation::ADD, &_input_to_cell_outstage_res, &_recurrent_to_cell_outstage_res, &_recurrent_to_cell_outstage_res, ConvertPolicy::SATURATE);
+    _accumulate_input_recurrent_modulation.configure(compile_context, ArithmeticOperation::ADD, &_input_to_cell_outstage_res, &_recurrent_to_cell_outstage_res, &_recurrent_to_cell_outstage_res,
+                                                     ConvertPolicy::SATURATE);
      _input_to_cell_outstage_res.allocator()->allocate();
  
      const TensorInfo cell_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
      _memory_group.manage(&_cell_gate);
      _cell_gate.allocator()->init(cell_gate_info);
-    _cell_gate_tanh.configure(&_recurrent_to_cell_outstage_res, &_cell_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
+    _cell_gate_tanh.configure(compile_context, &_recurrent_to_cell_outstage_res, &_cell_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
      _recurrent_to_cell_outstage_res.allocator()->allocate();
  
      // Input gate.
@@ -242,75 +258,77 @@ void CLQLSTMLayer::configure(const ICLTensor *input,
      if(_has_cifg)
      {
          _ones.allocator()->init(*_forget_gate.info());
-        _input_gate_sub.configure(ArithmeticOperation::SUB, &_ones, &_forget_gate, &_input_gate, ConvertPolicy::SATURATE);
+        _input_gate_sub.configure(compile_context, ArithmeticOperation::SUB, &_ones, &_forget_gate, &_input_gate, ConvertPolicy::SATURATE);
          _ones.allocator()->allocate();
      }
      else
      {
          const TensorInfo input_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.input_intermediate_scale(), 0));
          const float      input_to_input_scale = _input_to_input_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.input_intermediate_scale();
-        configure_mm(_mm_input_to_input, _input_to_input_outstage, gemmlowp_info,
+        configure_mm(compile_context, _mm_input_to_input, _input_to_input_outstage, gemmlowp_info,
                       input, &_input_to_input_weights_transposed, &_input_to_input_eff_bias,
                       &_mm_input_to_input_res, &_input_to_input_outstage_res, input_to_input_scale,
                       mm_out_info, input_outstage_info);
  
          const float recurrent_to_input_scale = _recurrent_to_input_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.input_intermediate_scale();
-        configure_mm(_mm_recurrent_to_input, _recurrent_to_input_outstage, gemmlowp_info,
+        configure_mm(compile_context, _mm_recurrent_to_input, _recurrent_to_input_outstage, gemmlowp_info,
                       input, &_recurrent_to_input_weights_transposed, &_recurrent_to_input_eff_bias,
                       &_mm_recurrent_to_input_res, &_recurrent_to_input_outstage_res, recurrent_to_input_scale,
                       mm_out_info, input_outstage_info);
-        _accumulate_input_recurrent_input.configure(ArithmeticOperation::ADD, &_input_to_input_outstage_res, &_recurrent_to_input_outstage_res, &_recurrent_to_input_outstage_res, ConvertPolicy::SATURATE);
+        _accumulate_input_recurrent_input.configure(compile_context, ArithmeticOperation::ADD, &_input_to_input_outstage_res, &_recurrent_to_input_outstage_res, &_recurrent_to_input_outstage_res,
+                                                    ConvertPolicy::SATURATE);
          _input_to_input_outstage_res.allocator()->allocate();
  
          if(_has_peephole)
          {
              _memory_group.manage(&_mul_cell_to_input_res);
-            _pixelwise_mul_cell_to_input.configure(cell_state_in, lstm_params.cell_to_input_weights(), &_mul_cell_to_input_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+            _pixelwise_mul_cell_to_input.configure(compile_context, cell_state_in, lstm_params.cell_to_input_weights(), &_mul_cell_to_input_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
              const float cell_to_input_scale = std::pow(2, cell_shift) * lstm_params.cell_to_input_weights()->info()->quantization_info().uniform().scale / lstm_params.input_intermediate_scale();
              quantization::calculate_quantized_multiplier(cell_to_input_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
              _cell_to_input_outstage_res.allocator()->init(TensorInfo(_mul_cell_to_input_res.info()->tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.input_intermediate_scale(), 0)));
              _memory_group.manage(&_cell_to_input_outstage_res);
-            _cell_to_input_outstage.configure(&_mul_cell_to_input_res, nullptr, &_cell_to_input_outstage_res, gemmlowp_info);
+            _cell_to_input_outstage.configure(compile_context, &_mul_cell_to_input_res, nullptr, &_cell_to_input_outstage_res, gemmlowp_info);
              _mul_cell_to_input_res.allocator()->allocate();
              _accumulate_cell_input.configure(ArithmeticOperation::ADD, &_recurrent_to_input_outstage_res, &_cell_to_input_outstage_res, &_recurrent_to_input_outstage_res, ConvertPolicy::SATURATE);
              _cell_to_input_outstage_res.allocator()->allocate();
          }
  
-        _input_gate_tanh.configure(&_recurrent_to_input_outstage_res, &_input_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
+        _input_gate_tanh.configure(compile_context, &_recurrent_to_input_outstage_res, &_input_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
          _recurrent_to_input_outstage_res.allocator()->allocate();
      }
      // Cell.
      // TODO(COMPMID-3396): Perform multiplication in the quantized domain in CLPixelWiseMultiplicationKernel
-    _pixelwise_mul_forget_cell.configure(&_forget_gate, cell_state_in, &_forget_gate, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+    _pixelwise_mul_forget_cell.configure(compile_context, &_forget_gate, cell_state_in, &_forget_gate, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
      const float      cell_gate_scale      = _cell_gate.info()->quantization_info().uniform().scale;
      const float      mul_input_cell_scale = cell_gate_scale * std::pow(2, 15 + cell_shift);
      const TensorInfo mul_input_cell_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(mul_input_cell_scale, 0));
      _memory_group.manage(&_mul_input_cell_res);
      _mul_input_cell_res.allocator()->init(mul_input_cell_info);
-    _pixelwise_mul_input_cell.configure(&_input_gate, &_cell_gate, &_mul_input_cell_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+    _pixelwise_mul_input_cell.configure(compile_context, &_input_gate, &_cell_gate, &_mul_input_cell_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
      _cell_gate.allocator()->allocate();
-    _add_forget_cell.configure(ArithmeticOperation::ADD, &_forget_gate, &_mul_input_cell_res, cell_state_out, ConvertPolicy::SATURATE);
+    _add_forget_cell.configure(compile_context, ArithmeticOperation::ADD, &_forget_gate, &_mul_input_cell_res, cell_state_out, ConvertPolicy::SATURATE);
      _mul_input_cell_res.allocator()->allocate();
      _forget_gate.allocator()->allocate();
      if(_has_cell_clipping)
      {
-        _cell_clip.configure(cell_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_cell_clip, quantized_cell_clip));
+        _cell_clip.configure(compile_context, cell_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_cell_clip, quantized_cell_clip));
      }
      // Output gate.
      const TensorInfo output_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.output_intermediate_scale(), 0));
      const float      input_to_output_scale = input_to_output_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.output_intermediate_scale();
-    configure_mm(_mm_input_to_output, _input_to_output_outstage, gemmlowp_info,
+    configure_mm(compile_context, _mm_input_to_output, _input_to_output_outstage, gemmlowp_info,
                   input, &_input_to_output_weights_transposed, &_input_to_output_eff_bias,
                   &_mm_input_to_output_res, &_input_to_output_outstage_res, input_to_output_scale,
                   mm_out_info, output_outstage_info);
  
      const float recurrent_to_output_scale = recurrent_to_output_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.output_intermediate_scale();
-    configure_mm(_mm_recurrent_to_output, _recurrent_to_output_outstage, gemmlowp_info,
+    configure_mm(compile_context, _mm_recurrent_to_output, _recurrent_to_output_outstage, gemmlowp_info,
                   output_state_in, &_recurrent_to_output_weights_transposed, &_recurrent_to_output_eff_bias,
                   &_mm_recurrent_to_output_res, &_recurrent_to_output_outstage_res, recurrent_to_output_scale,
                   mm_out_info, output_outstage_info);
  
-    _accumulate_input_recurrent_output.configure(ArithmeticOperation::ADD, &_recurrent_to_output_outstage_res, &_input_to_output_outstage_res, &_recurrent_to_output_outstage_res, ConvertPolicy::SATURATE);
+    _accumulate_input_recurrent_output.configure(compile_context, ArithmeticOperation::ADD, &_recurrent_to_output_outstage_res, &_input_to_output_outstage_res, &_recurrent_to_output_outstage_res,
+                                                 ConvertPolicy::SATURATE);
      _input_to_output_outstage_res.allocator()->allocate();
  
      if(_has_peephole)
@@ -320,31 +338,32 @@ void CLQLSTMLayer::configure(const ICLTensor *input,
          // const float cell_to_output_scale = std::pow(2, cell_shift) * lstm_params.cell_to_output_weights()->info()->quantization_info().uniform().scale / lstm_params.output_intermediate_scale();
          // quantization::calculate_quantized_multiplier(cell_to_output_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
          _memory_group.manage(&_mul_cell_to_output_res);
-        _pixelwise_mul_cell_to_output.configure(cell_state_out, lstm_params.cell_to_output_weights(), &_mul_cell_to_output_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
-        _accumulate_cell_to_output.configure(ArithmeticOperation::ADD, &_recurrent_to_output_outstage_res, &_mul_cell_to_output_res, &_recurrent_to_output_outstage_res, ConvertPolicy::SATURATE);
+        _pixelwise_mul_cell_to_output.configure(compile_context, cell_state_out, lstm_params.cell_to_output_weights(), &_mul_cell_to_output_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+        _accumulate_cell_to_output.configure(compile_context, ArithmeticOperation::ADD, &_recurrent_to_output_outstage_res, &_mul_cell_to_output_res, &_recurrent_to_output_outstage_res,
+                                             ConvertPolicy::SATURATE);
          _mul_cell_to_output_res.allocator()->allocate();
      }
  
      const TensorInfo output_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
      _memory_group.manage(&_output_gate);
      _output_gate.allocator()->init(output_gate_info);
-    _output_gate_sigmoid.configure(&_recurrent_to_output_outstage_res, &_output_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+    _output_gate_sigmoid.configure(compile_context, &_recurrent_to_output_outstage_res, &_output_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
      _recurrent_to_output_outstage_res.allocator()->allocate();
  
      // Hidden.
-    _hidden_tanh.configure(cell_state_out, &_input_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
+    _hidden_tanh.configure(compile_context, cell_state_out, &_input_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
      // TODO(COMPMID-3396): Perform multiplication in the quantized domain in CLPixelWiseMultiplicationKernel
      _memory_group.manage(&_hidden_mul_res);
      const TensorInfo hidden_mul_res(_input_gate.info()->tensor_shape(), 1, DataType::S32);
      _hidden_mul_res.allocator()->init(hidden_mul_res);
-    _pixelwise_mul_hidden.configure(&_output_gate, &_input_gate, &_hidden_mul_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+    _pixelwise_mul_hidden.configure(compile_context, &_output_gate, &_input_gate, &_hidden_mul_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
      _output_gate.allocator()->allocate();
      _input_gate.allocator()->allocate();
      const float hidden_state_scale = std::pow(2, -15) / lstm_params.hidden_state_scale() * std::pow(2, -15);
      quantization::calculate_quantized_multiplier(hidden_state_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift, /* ignore_epsilon */ true);
      gemmlowp_info.gemmlowp_offset  = lstm_params.hidden_state_zero();
      gemmlowp_info.output_data_type = output_state_in->info()->data_type();
-    _hidden_outstage.configure(&_hidden_mul_res, nullptr, output_state_out, gemmlowp_info);
+    _hidden_outstage.configure(compile_context, &_hidden_mul_res, nullptr, output_state_out, gemmlowp_info);
      _hidden_mul_res.allocator()->allocate();
  
      // Projection.
@@ -358,12 +377,12 @@ void CLQLSTMLayer::configure(const ICLTensor *input,
          gemmlowp_info.gemmlowp_max_bound               = std::numeric_limits<int8_t>::max();
          gemmlowp_info.output_data_type                 = DataType::QASYMM8_SIGNED;
  
-        configure_mm(_mm_projection, _projection_outstage, gemmlowp_info,
+        configure_mm(compile_context, _mm_projection, _projection_outstage, gemmlowp_info,
                       output_state_out, &_projection_weights_transposed, &_projection_eff_bias,
                       &_mm_projection_res, &_projection_outstage_res, projection_scale,
                       mm_out_info, projection_outstage_info);
  
-        _accumulate_projection.configure(ArithmeticOperation::ADD, &_projection_outstage_res, output_state_out, output_state_out, ConvertPolicy::SATURATE);
+        _accumulate_projection.configure(compile_context, ArithmeticOperation::ADD, &_projection_outstage_res, output_state_out, output_state_out, ConvertPolicy::SATURATE);
          _projection_outstage_res.allocator()->allocate();
  
          int8_t quantized_projection_clip{ 0 };
@@ -374,7 +393,8 @@ void CLQLSTMLayer::configure(const ICLTensor *input,
  
          if(quantized_projection_clip > 0)
          {
-            _projection_clip.configure(output_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_projection_clip, quantized_projection_clip));
+            _projection_clip.configure(compile_context, output_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_projection_clip,
+                                                                                                       quantized_projection_clip));
              _has_projection_clipping = true;
          }
      }
diff --git a/src/runtime/CL/functions/CLQuantizationLayer.cpp b/src/runtime/CL/functions/CLQuantizationLayer.cpp

index cc78ccede3f0ad7610c47117929008e4acd6594c..6239f279ea5d5fb3f5e01e947c82c000dc046378 100644 (file)
--- a/src/runtime/CL/functions/CLQuantizationLayer.cpp
+++ b/src/runtime/CL/functions/CLQuantizationLayer.cpp
@@ -29,9 +29,14 @@
  namespace arm_compute
  {
  void CLQuantizationLayer::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLQuantizationLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLQuantizationLayerKernel>();
-    k->configure(input, output);
+    k->configure(compile_context, input, output);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLRNNLayer.cpp b/src/runtime/CL/functions/CLRNNLayer.cpp

index e839a6ba2115e6a4f63ff4bdc2489335dd4d4fcf..57b8d70089f59046e8e9f72bfc52f8336d4f0777 100644 (file)
--- a/src/runtime/CL/functions/CLRNNLayer.cpp
+++ b/src/runtime/CL/functions/CLRNNLayer.cpp
@@ -67,6 +67,13 @@ Status CLRNNLayer::validate(const ITensorInfo *input, const ITensorInfo *weights
  
  void CLRNNLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *recurrent_weights, const ICLTensor *bias, ICLTensor *hidden_state, ICLTensor *output,
                             ActivationLayerInfo &info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, weights, recurrent_weights, bias, hidden_state, output, info);
+}
+
+void CLRNNLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *recurrent_weights, const ICLTensor *bias,
+                           ICLTensor *hidden_state,
+                           ICLTensor *output, ActivationLayerInfo &info)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output);
      ARM_COMPUTE_ERROR_THROW_ON(CLRNNLayer::validate(input->info(), weights->info(), recurrent_weights->info(), bias->info(), hidden_state->info(), output->info(), info));
@@ -81,23 +88,23 @@ void CLRNNLayer::configure(const ICLTensor *input, const ICLTensor *weights, con
  
      // Manage intermediate buffers and configure
      _memory_group.manage(&_fully_connected_out);
-    _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out);
+    _fully_connected_kernel.configure(compile_context, input, weights, bias, &_fully_connected_out);
  
      _memory_group.manage(&_gemm_output);
-    _gemm_state_f.configure(hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f);
+    _gemm_state_f.configure(compile_context, hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f);
  
      _add_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
      _memory_group.manage(&_add_output);
  
-    _add_kernel.configure(ArithmeticOperation::ADD, &_fully_connected_out, &_gemm_output, &_add_output, ConvertPolicy::SATURATE);
+    _add_kernel.configure(compile_context, ArithmeticOperation::ADD, &_fully_connected_out, &_gemm_output, &_add_output, ConvertPolicy::SATURATE);
  
      _fully_connected_out.allocator()->allocate();
      _gemm_output.allocator()->allocate();
  
-    _activation_kernel.configure(&_add_output, hidden_state, info);
+    _activation_kernel.configure(compile_context, &_add_output, hidden_state, info);
      _add_output.allocator()->allocate();
  
-    _copy_kernel.configure(hidden_state, output);
+    _copy_kernel.configure(compile_context, hidden_state, output);
  }
  
  void CLRNNLayer::run()
diff --git a/src/runtime/CL/functions/CLROIAlignLayer.cpp b/src/runtime/CL/functions/CLROIAlignLayer.cpp

index f0044e20a02f901424480fd5e01f8fdd99765985..43b58ddb9b9f10fde0a05c83bc7bdf2ceaded7bb 100644 (file)
--- a/src/runtime/CL/functions/CLROIAlignLayer.cpp
+++ b/src/runtime/CL/functions/CLROIAlignLayer.cpp
@@ -37,10 +37,15 @@ Status CLROIAlignLayer::validate(const ITensorInfo *input, const ITensorInfo *ro
  }
  
  void CLROIAlignLayer::configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, rois, output, pool_info);
+}
+
+void CLROIAlignLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
  {
      // Configure ROI pooling kernel
      auto k = arm_compute::support::cpp14::make_unique<CLROIAlignLayerKernel>();
-    k->configure(input, rois, output, pool_info);
+    k->configure(compile_context, input, rois, output, pool_info);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLROIPoolingLayer.cpp b/src/runtime/CL/functions/CLROIPoolingLayer.cpp

index 6780907155a7d3bf903e71e4cc9725df477e15e2..bb54cfa2ca6d2fa0e1fac10acd95eb4ad1087a93 100644 (file)
--- a/src/runtime/CL/functions/CLROIPoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLROIPoolingLayer.cpp
@@ -31,9 +31,14 @@
  using namespace arm_compute;
  
  void CLROIPoolingLayer::configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, rois, output, pool_info);
+}
+
+void CLROIPoolingLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
  {
      // Configure ROI pooling kernel
      auto k = arm_compute::support::cpp14::make_unique<CLROIPoolingLayerKernel>();
-    k->configure(input, rois, output, pool_info);
+    k->configure(compile_context, input, rois, output, pool_info);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLRange.cpp b/src/runtime/CL/functions/CLRange.cpp

index 5dedcc081cea46a2d5fe1ca6a9efd1e1ec7f9548..b29b03d5b55c1c3458b0b626ff9f6eaf97df1088 100644 (file)
--- a/src/runtime/CL/functions/CLRange.cpp
+++ b/src/runtime/CL/functions/CLRange.cpp
@@ -33,10 +33,15 @@
  using namespace arm_compute;
  
  void CLRange::configure(ICLTensor *output, const float start, const float end, const float step)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), output, start, end, step);
+}
+
+void CLRange::configure(const CLCompileContext &compile_context, ICLTensor *output, const float start, const float end, const float step)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLRangeKernel>();
      k->set_target(CLScheduler::get().target());
-    k->configure(output, start, end, step);
+    k->configure(compile_context, output, start, end, step);
      _kernel = std::move(k);
  
      // Tune kernels
diff --git a/src/runtime/CL/functions/CLReduceMean.cpp b/src/runtime/CL/functions/CLReduceMean.cpp

index 3b7a7f873aafca5127c3c7031f00b112a8b840c8..3ddaa00d4b4b2a5674bb4295295d60848a2ab1a7 100644 (file)
--- a/src/runtime/CL/functions/CLReduceMean.cpp
+++ b/src/runtime/CL/functions/CLReduceMean.cpp
@@ -92,6 +92,11 @@ CLReduceMean::CLReduceMean(std::shared_ptr<IMemoryManager> memory_manager)
  {
  }
  void CLReduceMean::configure(ICLTensor *input, const Coordinates &reduction_axis, bool keep_dims, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, reduction_axis, keep_dims, output);
+}
+
+void CLReduceMean::configure(const CLCompileContext &compile_context, ICLTensor *input, const Coordinates &reduction_axis, bool keep_dims, ICLTensor *output)
  {
      // Perform validate step
      ARM_COMPUTE_ERROR_THROW_ON(CLReduceMean::validate(input->info(), reduction_axis, keep_dims, output->info()));
@@ -118,13 +123,13 @@ void CLReduceMean::configure(ICLTensor *input, const Coordinates &reduction_axis
  
          if(i == _reduction_ops - 1 && keep_dims)
          {
-            _reduction_kernels[i].configure(in, output, axis_local[i], ReductionOperation::MEAN_SUM);
+            _reduction_kernels[i].configure(compile_context, in, output, axis_local[i], ReductionOperation::MEAN_SUM);
          }
          else
          {
              _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(), input->info()->data_type(), input->info()->quantization_info()));
              _memory_group.manage(&_reduced_outs[i]);
-            _reduction_kernels[i].configure(in, &_reduced_outs[i], axis_local[i], ReductionOperation::MEAN_SUM);
+            _reduction_kernels[i].configure(compile_context, in, &_reduced_outs[i], axis_local[i], ReductionOperation::MEAN_SUM);
          }
      }
  
@@ -147,7 +152,7 @@ void CLReduceMean::configure(ICLTensor *input, const Coordinates &reduction_axis
              out_shape.remove_dimension(axis_local[i] - i);
          }
          auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(out_shape));
-        _reshape.configure(&_reduced_outs[_reduction_ops - 1], output);
+        _reshape.configure(compile_context, &_reduced_outs[_reduction_ops - 1], output);
      }
  }
  
diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp

index 77168998f81228fddc07fa3c95aeedcb02ce69b0..b659ecfaf6fe321d2d2d33dcefee58be3bd60353 100644 (file)
--- a/src/runtime/CL/functions/CLReductionOperation.cpp
+++ b/src/runtime/CL/functions/CLReductionOperation.cpp
@@ -190,6 +190,11 @@ ICLTensor *CLReductionOperation::configure_intermediate_result_vector(ICLTensor
  }
  
  void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, axis, op, keep_dims);
+}
+
+void CLReductionOperation::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
      _op                  = op;
@@ -218,7 +223,7 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign
              _memory_group.manage(&_results_vector.back());
          }
  
-        _reduction_kernels_vector[0].configure(input, output_internal, axis, op, 0);
+        _reduction_kernels_vector[0].configure(compile_context, input, output_internal, axis, op, 0);
      }
      else
      {
@@ -318,15 +323,15 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign
                  ARM_COMPUTE_ERROR("Not supported");
          }
  
-        _reduction_kernels_vector[0].configure(input, &_results_vector[0], axis, first_kernel_op);
-        _border_handlers_vector[0].configure(input, _reduction_kernels_vector[0].border_size(), BorderMode::CONSTANT, pixelValue);
+        _reduction_kernels_vector[0].configure(compile_context, input, &_results_vector[0], axis, first_kernel_op);
+        _border_handlers_vector[0].configure(compile_context, input, _reduction_kernels_vector[0].border_size(), BorderMode::CONSTANT, pixelValue);
  
          // Apply ReductionOperation on intermediate stages
          for(unsigned int i = 1; i < _num_of_stages - 1; ++i)
          {
              _memory_group.manage(&_results_vector[i]);
-            _reduction_kernels_vector[i].configure(&_results_vector[i - 1], &_results_vector[i], axis, intermediate_kernel_op);
-            _border_handlers_vector[i].configure(&_results_vector[i - 1], _reduction_kernels_vector[i].border_size(), BorderMode::CONSTANT, pixelValue);
+            _reduction_kernels_vector[i].configure(compile_context, &_results_vector[i - 1], &_results_vector[i], axis, intermediate_kernel_op);
+            _border_handlers_vector[i].configure(compile_context, &_results_vector[i - 1], _reduction_kernels_vector[i].border_size(), BorderMode::CONSTANT, pixelValue);
              _results_vector[i - 1].allocator()->allocate();
          }
  
@@ -339,14 +344,14 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign
              _memory_group.manage(&_results_vector.back());
          }
  
-        _reduction_kernels_vector[last_stage].configure(&_results_vector[last_stage - 1], output_internal, axis, last_kernel_op, input_width);
-        _border_handlers_vector[last_stage].configure(&_results_vector[last_stage - 1], _reduction_kernels_vector[last_stage].border_size(), BorderMode::CONSTANT, pixelValue);
+        _reduction_kernels_vector[last_stage].configure(compile_context, &_results_vector[last_stage - 1], output_internal, axis, last_kernel_op, input_width);
+        _border_handlers_vector[last_stage].configure(compile_context, &_results_vector[last_stage - 1], _reduction_kernels_vector[last_stage].border_size(), BorderMode::CONSTANT, pixelValue);
          _results_vector[last_stage - 1].allocator()->allocate();
      }
  
      if(_is_reshape_required)
      {
-        _reshape_kernel.configure(&_results_vector.back(), output);
+        _reshape_kernel.configure(compile_context, &_results_vector.back(), output);
          _results_vector.back().allocator()->allocate();
      }
  }
diff --git a/src/runtime/CL/functions/CLRemap.cpp b/src/runtime/CL/functions/CLRemap.cpp

index 9e4529b1dccab7a0ce86fae0e4a0243aed6f0e02..af241ec29982334c75f55bd4ce139c38d1e22ce4 100644 (file)
--- a/src/runtime/CL/functions/CLRemap.cpp
+++ b/src/runtime/CL/functions/CLRemap.cpp
@@ -36,6 +36,13 @@
  using namespace arm_compute;
  
  void CLRemap::configure(ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, map_x, map_y, output, policy, border_mode, constant_border_value);
+}
+
+void CLRemap::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy,
+                        BorderMode border_mode,
+                        uint8_t constant_border_value)
  {
      ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
      ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
@@ -44,7 +51,7 @@ void CLRemap::configure(ICLTensor *input, const ICLTensor *map_x, const ICLTenso
      ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported");
  
      auto k = arm_compute::support::cpp14::make_unique<CLRemapKernel>();
-    k->configure(input, map_x, map_y, output, policy, border_mode == BorderMode::UNDEFINED);
+    k->configure(compile_context, input, map_x, map_y, output, policy, border_mode == BorderMode::UNDEFINED);
      _kernel = std::move(k);
-    _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
  }
diff --git a/src/runtime/CL/functions/CLReorgLayer.cpp b/src/runtime/CL/functions/CLReorgLayer.cpp

index 547d0fc4839131c0acac659be22fa7bd0aca0557..ea9331414cbfe26deb2d4cf6ea8afe87295a6035 100644 (file)
--- a/src/runtime/CL/functions/CLReorgLayer.cpp
+++ b/src/runtime/CL/functions/CLReorgLayer.cpp
@@ -35,9 +35,14 @@
  using namespace arm_compute;
  
  void CLReorgLayer::configure(ICLTensor *input, ICLTensor *output, int32_t stride)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, stride);
+}
+
+void CLReorgLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int32_t stride)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLReorgLayerKernel>();
-    k->configure(input, output, stride);
+    k->configure(compile_context, input, output, stride);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLReshapeLayer.cpp b/src/runtime/CL/functions/CLReshapeLayer.cpp

index 3325d2c2349920b1dfced7acc52cb99958e8011d..13baedb3f96c848f4b903dc758075603616ef79f 100644 (file)
--- a/src/runtime/CL/functions/CLReshapeLayer.cpp
+++ b/src/runtime/CL/functions/CLReshapeLayer.cpp
@@ -31,9 +31,14 @@
  using namespace arm_compute;
  
  void CLReshapeLayer::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLReshapeLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLReshapeLayerKernel>();
-    k->configure(input, output);
+    k->configure(compile_context, input, output);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLReverse.cpp b/src/runtime/CL/functions/CLReverse.cpp

index 3f7a2708bbbba259b9e65875c8d29f45a9980b05..3c8bc15a5448691a68372a5b1550e51b6e5aa805 100644 (file)
--- a/src/runtime/CL/functions/CLReverse.cpp
+++ b/src/runtime/CL/functions/CLReverse.cpp
@@ -30,9 +30,14 @@
  namespace arm_compute
  {
  void CLReverse::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, axis);
+}
+
+void CLReverse::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLReverseKernel>();
-    k->configure(input, output, axis);
+    k->configure(compile_context, input, output, axis);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLScale.cpp b/src/runtime/CL/functions/CLScale.cpp

index 9ec8c44836210016264e20cfba696eb7be4a4be3..a9395bdc3da69281e31975882e856acf5a52e408 100644 (file)
--- a/src/runtime/CL/functions/CLScale.cpp
+++ b/src/runtime/CL/functions/CLScale.cpp
@@ -34,11 +34,17 @@ using namespace arm_compute;
  
  void CLScale::configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy, bool use_padding,
                          bool align_corners)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, policy, border_mode, constant_border_value, sampling_policy, use_padding, align_corners);
+}
+
+void CLScale::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value,
+                        SamplingPolicy sampling_policy, bool use_padding, bool align_corners)
  {
      ARM_COMPUTE_UNUSED(use_padding);
      auto k = arm_compute::support::cpp14::make_unique<CLScaleKernel>();
      k->set_target(CLScheduler::get().target());
-    k->configure(input, output, policy, border_mode, sampling_policy, align_corners);
+    k->configure(compile_context, input, output, policy, border_mode, sampling_policy, align_corners);
      _kernel = std::move(k);
  
      // Tune kernels
@@ -50,7 +56,7 @@ void CLScale::configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy
      {
          border_mode = BorderMode::CONSTANT;
      }
-    _border_handler.configure(input, _kernel->border_size(), border_mode, constant_border_value);
+    _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, constant_border_value);
  }
  
  Status CLScale::validate(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy,
diff --git a/src/runtime/CL/functions/CLScharr3x3.cpp b/src/runtime/CL/functions/CLScharr3x3.cpp

index df96ce83ba36c0a56d6a551fea2c0626a4ae3d27..faad5424a247d6ded94ceb7f45a0240d090282bd 100644 (file)
--- a/src/runtime/CL/functions/CLScharr3x3.cpp
+++ b/src/runtime/CL/functions/CLScharr3x3.cpp
@@ -32,9 +32,14 @@
  using namespace arm_compute;
  
  void CLScharr3x3::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
+}
+
+void CLScharr3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLScharr3x3Kernel>();
-    k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+    k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
      _kernel = std::move(k);
-    _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
  }
diff --git a/src/runtime/CL/functions/CLSelect.cpp b/src/runtime/CL/functions/CLSelect.cpp

index 90c368e9b8f093222fb2366579941314b404ba42..7187010448fc944cee2aaa08772f789020da6308 100644 (file)
--- a/src/runtime/CL/functions/CLSelect.cpp
+++ b/src/runtime/CL/functions/CLSelect.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -32,9 +32,14 @@ using namespace arm_compute;
  namespace arm_compute
  {
  void CLSelect::configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), c, x, y, output);
+}
+
+void CLSelect::configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLSelectKernel>();
-    k->configure(c, x, y, output);
+    k->configure(compile_context, c, x, y, output);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLSlice.cpp b/src/runtime/CL/functions/CLSlice.cpp

index d8e8e7e14003a38af8356b02797f49f6b4986fc6..e8cc0f5499007261f369df6de901a6703d6853ac 100644 (file)
--- a/src/runtime/CL/functions/CLSlice.cpp
+++ b/src/runtime/CL/functions/CLSlice.cpp
@@ -32,6 +32,11 @@
  namespace arm_compute
  {
  void CLSlice::configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, starts, ends);
+}
+
+void CLSlice::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input);
  
@@ -39,7 +44,7 @@ void CLSlice::configure(const ICLTensor *input, ICLTensor *output, const Coordin
      const int32_t slice_end_mask = arm_compute::helpers::tensor_transform::construct_slice_end_mask(ends);
  
      auto k = arm_compute::support::cpp14::make_unique<CLStridedSliceKernel>();
-    k->configure(input, output, starts, ends, BiStrides(), 0, slice_end_mask, 0);
+    k->configure(compile_context, input, output, starts, ends, BiStrides(), 0, slice_end_mask, 0);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLSobel3x3.cpp b/src/runtime/CL/functions/CLSobel3x3.cpp

index e362ebf8ba4cac3ee86a31307899397fe60a2ca7..c3604f970f81f60676973c7030be8578ce00c4ac 100644 (file)
--- a/src/runtime/CL/functions/CLSobel3x3.cpp
+++ b/src/runtime/CL/functions/CLSobel3x3.cpp
@@ -32,9 +32,14 @@
  using namespace arm_compute;
  
  void CLSobel3x3::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
+}
+
+void CLSobel3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLSobel3x3Kernel>();
-    k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+    k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
      _kernel = std::move(k);
-    _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
  }
diff --git a/src/runtime/CL/functions/CLSobel5x5.cpp b/src/runtime/CL/functions/CLSobel5x5.cpp

index 22fbef17eb0aa5d39fafef2345d9b765e5bbf09f..f8a33f3fb68846e6a35c9274005f72317e849786 100644 (file)
--- a/src/runtime/CL/functions/CLSobel5x5.cpp
+++ b/src/runtime/CL/functions/CLSobel5x5.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -39,6 +39,11 @@ CLSobel5x5::CLSobel5x5(std::shared_ptr<IMemoryManager> memory_manager)
  }
  
  void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
+}
+
+void CLSobel5x5::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
  {
      ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
  
@@ -53,8 +58,8 @@ void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out
          _tmp_y.allocator()->init(tensor_info);
          _memory_group.manage(&_tmp_x);
          _memory_group.manage(&_tmp_y);
-        _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
-        _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+        _sobel_hor.configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+        _sobel_vert.configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
          _tmp_x.allocator()->allocate();
          _tmp_y.allocator()->allocate();
      }
@@ -62,19 +67,19 @@ void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out
      {
          _tmp_x.allocator()->init(tensor_info);
          _memory_group.manage(&_tmp_x);
-        _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
-        _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
+        _sobel_hor.configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
+        _sobel_vert.configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
          _tmp_x.allocator()->allocate();
      }
      else if(run_sobel_y)
      {
          _tmp_y.allocator()->init(tensor_info);
          _memory_group.manage(&_tmp_y);
-        _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
-        _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
+        _sobel_hor.configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+        _sobel_vert.configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
          _tmp_y.allocator()->allocate();
      }
-    _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value));
  }
  
  void CLSobel5x5::run()
diff --git a/src/runtime/CL/functions/CLSobel7x7.cpp b/src/runtime/CL/functions/CLSobel7x7.cpp

index 9b38f6928fa98d76faa248b2313b276c38e3f90e..6d3c7f0d08e709c3a6274a96abeaa367b86596f8 100644 (file)
--- a/src/runtime/CL/functions/CLSobel7x7.cpp
+++ b/src/runtime/CL/functions/CLSobel7x7.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -39,6 +39,11 @@ CLSobel7x7::CLSobel7x7(std::shared_ptr<IMemoryManager> memory_manager)
  }
  
  void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
+}
+
+void CLSobel7x7::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
  {
      ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
  
@@ -53,8 +58,8 @@ void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out
          _tmp_y.allocator()->init(tensor_info);
          _memory_group.manage(&_tmp_x);
          _memory_group.manage(&_tmp_y);
-        _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
-        _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+        _sobel_hor.configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+        _sobel_vert.configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
          _tmp_x.allocator()->allocate();
          _tmp_y.allocator()->allocate();
      }
@@ -62,19 +67,19 @@ void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out
      {
          _tmp_x.allocator()->init(tensor_info);
          _memory_group.manage(&_tmp_x);
-        _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
-        _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
+        _sobel_hor.configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
+        _sobel_vert.configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
          _tmp_x.allocator()->allocate();
      }
      else if(run_sobel_y)
      {
          _tmp_y.allocator()->init(tensor_info);
          _memory_group.manage(&_tmp_y);
-        _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
-        _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
+        _sobel_hor.configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+        _sobel_vert.configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
          _tmp_y.allocator()->allocate();
      }
-    _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value));
  }
  
  void CLSobel7x7::run()
diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp

index e01d2c75cadc906af2942088ebc84eb082ed2f7a..b0b2117cd99d8c3c8fd3fc221c2c3310a2a29999 100644 (file)
--- a/src/runtime/CL/functions/CLSoftmaxLayer.cpp
+++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -43,6 +43,12 @@ CLSoftmaxLayerGeneric<IS_LOG>::CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryMana
  
  template <bool IS_LOG>
  void CLSoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const ICLTensor *input, const ICLTensor *output, size_t axis)
+{
+    configure_reshape_input_kernel(CLKernelLibrary::get().get_compile_context(), input, output, axis);
+}
+
+template <bool IS_LOG>
+void CLSoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t axis)
  {
      // Flatten the input
      const TensorShape shape_flatten = misc::shape_calculator::compute_softmax_shape(input->info(), axis);
@@ -56,13 +62,13 @@ void CLSoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const ICLTens
      if(axis != 3)
      {
          auto reshape_kernel_ptr = support::cpp14::make_unique<CLReshapeLayerKernel>();
-        reshape_kernel_ptr->configure(input, &_input_flattened);
+        reshape_kernel_ptr->configure(compile_context, input, &_input_flattened);
          _flatten_kernel_ptr = std::move(reshape_kernel_ptr);
      }
      else
      {
          auto flatten_kernel_ptr = support::cpp14::make_unique<CLFlattenLayerKernel>();
-        flatten_kernel_ptr->configure(input, &_input_flattened);
+        flatten_kernel_ptr->configure(compile_context, input, &_input_flattened);
          _flatten_kernel_ptr = std::move(flatten_kernel_ptr);
      }
  
@@ -73,6 +79,12 @@ void CLSoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const ICLTens
  
  template <bool IS_LOG>
  void CLSoftmaxLayerGeneric<IS_LOG>::configure(const ICLTensor *input, ICLTensor *output, float beta, size_t axis)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, beta, axis);
+}
+
+template <bool IS_LOG>
+void CLSoftmaxLayerGeneric<IS_LOG>::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta, size_t axis)
  {
      // Perform validation step
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
@@ -123,7 +135,7 @@ void CLSoftmaxLayerGeneric<IS_LOG>::configure(const ICLTensor *input, ICLTensor
      softmax_info.input_data_type = input_2D->info()->data_type();
  
      // Configure kernels
-    _max_shift_exp_sum_kernel.configure(input_2D, &_max, &_tmp, &_sum, softmax_info);
+    _max_shift_exp_sum_kernel.configure(compile_context, input_2D, &_max, &_tmp, &_sum, softmax_info);
  
      if(_needs_flattening)
      {
@@ -131,10 +143,10 @@ void CLSoftmaxLayerGeneric<IS_LOG>::configure(const ICLTensor *input, ICLTensor
          _memory_group.manage(&_output_flattened);
  
          // The normalization kernel stores the result in a flat output tensor
-        _norm_kernel.configure(&_tmp, &_sum, &_output_flattened, softmax_info);
+        _norm_kernel.configure(compile_context, &_tmp, &_sum, &_output_flattened, softmax_info);
  
          // Reshape the flat output into a the requested (4D) output
-        _reshape_kernel.configure(&_output_flattened, output);
+        _reshape_kernel.configure(compile_context, &_output_flattened, output);
  
          // Allocate the intermediate flat tensors
          _input_flattened.allocator()->allocate();
@@ -143,7 +155,7 @@ void CLSoftmaxLayerGeneric<IS_LOG>::configure(const ICLTensor *input, ICLTensor
      else
      {
          // Softmax 2D case
-        _norm_kernel.configure(&_tmp, &_sum, output, softmax_info);
+        _norm_kernel.configure(compile_context, &_tmp, &_sum, output, softmax_info);
      }
  
      // Allocate intermediate buffers
@@ -203,7 +215,7 @@ Status CLSoftmaxLayerGeneric<IS_LOG>::validate(const ITensorInfo *input, const I
  }
  
  template <bool IS_LOG>
-void CLSoftmaxLayerGeneric<IS_LOG>::run()
+void           CLSoftmaxLayerGeneric<IS_LOG>::run()
  {
      MemoryGroupResourceScope scope_mg(_memory_group);
  
diff --git a/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp b/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp

index fa6e82efb027b89fd3b375537aed84c1eaf58141..021d31649d4dd74c16530aa0da927f1c481caf86 100644 (file)
--- a/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp
+++ b/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -38,27 +38,38 @@ CLSpaceToBatchLayer::CLSpaceToBatchLayer()
  }
  
  void CLSpaceToBatchLayer::configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, paddings, output);
+}
+
+void CLSpaceToBatchLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, block_shape, paddings, output);
  
      if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
      {
          _has_padding = true;
-        _memset_kernel.configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
+        _memset_kernel.configure(compile_context, output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
      }
-    _space_to_batch_kernel.configure(input, block_shape, paddings, output);
+    _space_to_batch_kernel.configure(compile_context, input, block_shape, paddings, output);
  }
  
  void CLSpaceToBatchLayer::configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, block_shape_x, block_shape_y, padding_left, padding_right, output);
+}
+
+void CLSpaceToBatchLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left,
+                                    const Size2D &padding_right, ICLTensor *output)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
  
      if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
      {
          _has_padding = true;
-        _memset_kernel.configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
+        _memset_kernel.configure(compile_context, output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
      }
-    _space_to_batch_kernel.configure(input, block_shape_x, block_shape_y, padding_left, padding_right, output);
+    _space_to_batch_kernel.configure(compile_context, input, block_shape_x, block_shape_y, padding_left, padding_right, output);
  }
  
  Status CLSpaceToBatchLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output)
diff --git a/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp b/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp

index f02a13f66de5d3f497beceedbf15baf198dd7686..a4ffefc18907475026f668f5072435d1a88b38b1 100644 (file)
--- a/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp
+++ b/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -39,7 +39,12 @@ CLSpaceToDepthLayer::CLSpaceToDepthLayer()
  
  void CLSpaceToDepthLayer::configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape)
  {
-    _space_to_depth_kernel.configure(input, output, block_shape);
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, block_shape);
+}
+
+void CLSpaceToDepthLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape)
+{
+    _space_to_depth_kernel.configure(compile_context, input, output, block_shape);
  }
  
  Status CLSpaceToDepthLayer::validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
diff --git a/src/runtime/CL/functions/CLStackLayer.cpp b/src/runtime/CL/functions/CLStackLayer.cpp

index 607445af4a9c4f923588e02c1a717a17fab87b18..79c3fe53713cca73c6f218f8396f147456f9ac1b 100644 (file)
--- a/src/runtime/CL/functions/CLStackLayer.cpp
+++ b/src/runtime/CL/functions/CLStackLayer.cpp
@@ -43,6 +43,11 @@ CLStackLayer::CLStackLayer() // NOLINT
  }
  
  void CLStackLayer::configure(const std::vector<ICLTensor *> &input, int axis, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, axis, output);
+}
+
+void CLStackLayer::configure(const CLCompileContext &compile_context, const std::vector<ICLTensor *> &input, int axis, ICLTensor *output)
  {
      _num_inputs = input.size();
      _stack_kernels.resize(_num_inputs);
@@ -52,7 +57,7 @@ void CLStackLayer::configure(const std::vector<ICLTensor *> &input, int axis, IC
  
      for(unsigned int i = 0; i < _num_inputs; i++)
      {
-        _stack_kernels[i].configure(input[i], axis_u, i, _num_inputs, output);
+        _stack_kernels[i].configure(compile_context, input[i], axis_u, i, _num_inputs, output);
      }
  }
  
diff --git a/src/runtime/CL/functions/CLStridedSlice.cpp b/src/runtime/CL/functions/CLStridedSlice.cpp

index e217906870f36e031f746d8e5a27ce75ef1211a8..454759664cd1f1bf2c28daeddc07ed7f9e104bbb 100644 (file)
--- a/src/runtime/CL/functions/CLStridedSlice.cpp
+++ b/src/runtime/CL/functions/CLStridedSlice.cpp
@@ -32,9 +32,16 @@ namespace arm_compute
  void CLStridedSlice::configure(const ICLTensor *input, ICLTensor *output,
                                 const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
                                 int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
+}
+
+void CLStridedSlice::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output,
+                               const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
+                               int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLStridedSliceKernel>();
-    k->configure(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
+    k->configure(compile_context, input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLTableLookup.cpp b/src/runtime/CL/functions/CLTableLookup.cpp

index 35f939f873b8dc5fd1c3ead48630464add81a0b5..47e15d3c123a626baef80d2ed7d17e7ef3bc49f5 100644 (file)
--- a/src/runtime/CL/functions/CLTableLookup.cpp
+++ b/src/runtime/CL/functions/CLTableLookup.cpp
@@ -31,8 +31,13 @@
  using namespace arm_compute;
  
  void CLTableLookup::configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, lut, output);
+}
+
+void CLTableLookup::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLTableLookupKernel>();
-    k->configure(input, lut, output);
+    k->configure(compile_context, input, lut, output);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLThreshold.cpp b/src/runtime/CL/functions/CLThreshold.cpp

index a655783498bc41d6e2a17525e1cda25836c92605..57c92724fa416a7b1811d9fd2192feae68002910 100644 (file)
--- a/src/runtime/CL/functions/CLThreshold.cpp
+++ b/src/runtime/CL/functions/CLThreshold.cpp
@@ -31,8 +31,14 @@
  using namespace arm_compute;
  
  void CLThreshold::configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, threshold, false_value, true_value, type, upper);
+}
+
+void CLThreshold::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type,
+                            uint8_t upper)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLThresholdKernel>();
-    k->configure(input, output, threshold, false_value, true_value, type, upper);
+    k->configure(compile_context, input, output, threshold, false_value, true_value, type, upper);
      _kernel = std::move(k);
  }
diff --git a/src/runtime/CL/functions/CLTile.cpp b/src/runtime/CL/functions/CLTile.cpp

index e45d88d867f7bef594705f5cc6250bebb874164b..178d7af95ed8a399eaa404d85025a2200d584fac 100644 (file)
--- a/src/runtime/CL/functions/CLTile.cpp
+++ b/src/runtime/CL/functions/CLTile.cpp
@@ -29,9 +29,14 @@
  namespace arm_compute
  {
  void CLTile::configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, multiples);
+}
+
+void CLTile::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLTileKernel>();
-    k->configure(input, output, multiples);
+    k->configure(compile_context, input, output, multiples);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLTranspose.cpp b/src/runtime/CL/functions/CLTranspose.cpp

index aa912303db43793d116649205d2955a373184eb8..f5121d06a57e27d39b783d22c8112583d2aa1f23 100644 (file)
--- a/src/runtime/CL/functions/CLTranspose.cpp
+++ b/src/runtime/CL/functions/CLTranspose.cpp
@@ -31,9 +31,14 @@
  using namespace arm_compute;
  
  void CLTranspose::configure(const ICLTensor *input, ICLTensor *output)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLTranspose::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>();
-    k->configure(input, output);
+    k->configure(compile_context, input, output);
      _kernel = std::move(k);
  }
  
diff --git a/src/runtime/CL/functions/CLUnstack.cpp b/src/runtime/CL/functions/CLUnstack.cpp

index eb1dd8cd4443d4afbbdd49b656cd256291784b02..032fb993d01473f17d798f7a41e50287649c920b 100644 (file)
--- a/src/runtime/CL/functions/CLUnstack.cpp
+++ b/src/runtime/CL/functions/CLUnstack.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -60,6 +60,11 @@ CLUnstack::CLUnstack() // NOLINT
  }
  
  void CLUnstack::configure(const ICLTensor *input, const std::vector<ICLTensor *> &output_vector, int axis)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output_vector, axis);
+}
+
+void CLUnstack::configure(const CLCompileContext &compile_context, const ICLTensor *input, const std::vector<ICLTensor *> &output_vector, int axis)
  {
      std::vector<ITensorInfo *> outputs_vector_info(output_vector.size());
      std::transform(output_vector.begin(), output_vector.end(), outputs_vector_info.begin(), [](ICLTensor * t)
@@ -83,7 +88,7 @@ void CLUnstack::configure(const ICLTensor *input, const std::vector<ICLTensor *>
      {
          // Adjusts start and end coordinates to take a 2D slice at a time
          slice_start.set(axis_u, slice);
-        _strided_slice_vector[slice].configure(input, output_vector[slice], slice_start, Coordinates(), BiStrides(), 0, slice_end_mask, (1 << axis_u));
+        _strided_slice_vector[slice].configure(compile_context, input, output_vector[slice], slice_start, Coordinates(), BiStrides(), 0, slice_end_mask, (1 << axis_u));
      }
  }
  
diff --git a/src/runtime/CL/functions/CLUpsampleLayer.cpp b/src/runtime/CL/functions/CLUpsampleLayer.cpp

index 1dad3250a29ace023ddce42652dddfe353c16c62..dd04686d6070c85529ba974c52930c701db1ac60 100644 (file)
--- a/src/runtime/CL/functions/CLUpsampleLayer.cpp
+++ b/src/runtime/CL/functions/CLUpsampleLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -43,11 +43,17 @@ Status CLUpsampleLayer::validate(const ITensorInfo *input, const ITensorInfo *ou
  
  void CLUpsampleLayer::configure(ICLTensor *input, ICLTensor *output,
                                  const Size2D &info, const InterpolationPolicy upsampling_policy)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, info, upsampling_policy);
+}
+
+void CLUpsampleLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output,
+                                const Size2D &info, const InterpolationPolicy upsampling_policy)
  {
      ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
  
      _output = output;
-    _upsample.configure(input, _output, info, upsampling_policy);
+    _upsample.configure(compile_context, input, _output, info, upsampling_policy);
  }
  
  void CLUpsampleLayer::run()
diff --git a/src/runtime/CL/functions/CLWarpAffine.cpp b/src/runtime/CL/functions/CLWarpAffine.cpp

index 08c22dba2b70b5c7b24cc67220ff3964c8e0d8e2..ce2171b3d4f2dcb2b065a985196c026666efdd38 100644 (file)
--- a/src/runtime/CL/functions/CLWarpAffine.cpp
+++ b/src/runtime/CL/functions/CLWarpAffine.cpp
@@ -32,9 +32,15 @@
  using namespace arm_compute;
  
  void CLWarpAffine::configure(ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, matrix, policy, border_mode, constant_border_value);
+}
+
+void CLWarpAffine::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode,
+                             uint8_t constant_border_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLWarpAffineKernel>();
-    k->configure(input, output, matrix, policy);
+    k->configure(compile_context, input, output, matrix, policy);
      _kernel = std::move(k);
-    _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
  }
diff --git a/src/runtime/CL/functions/CLWarpPerspective.cpp b/src/runtime/CL/functions/CLWarpPerspective.cpp

index b5bc4faba32c621629fafdf962aca41b496a4192..06c06616d0c4866c236b7605e32a704f5d0f11e0 100644 (file)
--- a/src/runtime/CL/functions/CLWarpPerspective.cpp
+++ b/src/runtime/CL/functions/CLWarpPerspective.cpp
@@ -32,9 +32,15 @@
  using namespace arm_compute;
  
  void CLWarpPerspective::configure(ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, matrix, policy, border_mode, constant_border_value);
+}
+
+void CLWarpPerspective::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode,
+                                  uint8_t constant_border_value)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLWarpPerspectiveKernel>();
-    k->configure(input, output, matrix, policy);
+    k->configure(compile_context, input, output, matrix, policy);
      _kernel = std::move(k);
-    _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+    _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
  }
diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp

index a5db9773717adce7d018952307761d40418b5aab..132c3ee926c52b78fba59b6455ea8d52a819bde8 100644 (file)
--- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -97,6 +97,13 @@ CLWinogradConvolutionLayer::CLWinogradConvolutionLayer(std::shared_ptr<IMemoryMa
  
  void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info,
                                             bool enable_fast_math)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, act_info, enable_fast_math);
+}
+
+void CLWinogradConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+                                           const PadStrideInfo &conv_info,
+                                           const ActivationLayerInfo &act_info, bool enable_fast_math)
  {
      // Get indices for the width and height
      const size_t idx_width  = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH);
@@ -129,17 +136,18 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we
      // Do not manage _input1 as it contains the weights
  
      // Configure input transform
-    _input_transform.configure(input, &_input0, winograd_info);
+    _input_transform.configure(compile_context, input, &_input0, winograd_info);
  
      // Configure filter transform
-    _filter_transform.configure(weights, &_input1, winograd_info);
+    _filter_transform.configure(compile_context, weights, &_input1, winograd_info);
  
      // Configure batched matrix multiply
-    _batched_mm.configure(&_input0, &_input1, nullptr, &_batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/, 0, false, false, GEMMLowpOutputStageInfo(),
-                                                                                                 (input->info()->data_type() == DataType::F16)));
+    _batched_mm.configure(compile_context, &_input0, &_input1, nullptr, &_batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/, 0, false, false,
+                                                                                                                  GEMMLowpOutputStageInfo(),
+                                                                                                                  (input->info()->data_type() == DataType::F16)));
  
      // Configure output transform
-    _output_transform.configure(&_batched_mm_output, biases, output, winograd_info, act_info);
+    _output_transform.configure(compile_context, &_batched_mm_output, biases, output, winograd_info, act_info);
  
      // Allocate temporary tensors
      _input0.allocator()->allocate();
diff --git a/src/runtime/CL/functions/CLWinogradInputTransform.cpp b/src/runtime/CL/functions/CLWinogradInputTransform.cpp

index 55eccf4765763064431af998d38733bfa216fb08..ae400768fe24884ee100eae30aba961af61a517d 100644 (file)
--- a/src/runtime/CL/functions/CLWinogradInputTransform.cpp
+++ b/src/runtime/CL/functions/CLWinogradInputTransform.cpp
@@ -31,11 +31,16 @@
  using namespace arm_compute;
  
  void CLWinogradInputTransform::configure(ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, winograd_info);
+}
+
+void CLWinogradInputTransform::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLWinogradInputTransformKernel>();
-    k->configure(input, output, winograd_info);
+    k->configure(compile_context, input, output, winograd_info);
      _kernel = std::move(k);
-    _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue());
+    _border_handler.configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue());
  }
  
  Status CLWinogradInputTransform::validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info)
diff --git a/src/runtime/CL/functions/CLYOLOLayer.cpp b/src/runtime/CL/functions/CLYOLOLayer.cpp

index b2b84d9dc173cb0daedc8138171fbd4d9527ad49..0c0c1065bcb665571758768ed12a76d906af6965 100644 (file)
--- a/src/runtime/CL/functions/CLYOLOLayer.cpp
+++ b/src/runtime/CL/functions/CLYOLOLayer.cpp
@@ -30,9 +30,14 @@
  using namespace arm_compute;
  
  void CLYOLOLayer::configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, act_info, num_classes);
+}
+
+void CLYOLOLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes)
  {
      auto k = arm_compute::support::cpp14::make_unique<CLYOLOLayerKernel>();
-    k->configure(input, output, act_info, num_classes);
+    k->configure(compile_context, input, output, act_info, num_classes);
      _kernel = std::move(k);
  }
  
diff --git a/tests/validation/fixtures/CropResizeFixture.h b/tests/validation/fixtures/CropResizeFixture.h

index d83c4113f5f277ddb535648c910d1036fb84da28..450c68e0e91aa8d6a1eb9f255fe8abcf346e210d 100644 (file)
--- a/tests/validation/fixtures/CropResizeFixture.h
+++ b/tests/validation/fixtures/CropResizeFixture.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -79,30 +79,27 @@ protected:
          TensorType boxes_ind = create_tensor<TensorType>(TensorShape(boxes_shape[1]), DataType::S32);
          TensorType dst       = create_tensor<TensorType>(dst_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC);
  
+        boxes.allocator()->allocate();
+        boxes_ind.allocator()->allocate();
+        fill(AccessorType(boxes), 1, is_outside_bounds ? 0.0f - out_of_bounds_reach : 0.0f, is_outside_bounds ? 1.0f + out_of_bounds_reach : 1.0f);
+        fill(AccessorType(boxes_ind), 2, 0, static_cast<int32_t>(src_shape[3] - 1));
+
          // Create and configure function
          FunctionType crop;
          crop.configure(&src, &boxes, &boxes_ind, &dst, crop_size, method, extrapolation_value);
  
          ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(boxes.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(boxes_ind.info()->is_resizable(), framework::LogLevel::ERRORS);
          ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
  
          // Allocate tensors
          src.allocator()->allocate();
-        boxes.allocator()->allocate();
-        boxes_ind.allocator()->allocate();
          dst.allocator()->allocate();
  
          ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!boxes.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!boxes_ind.info()->is_resizable(), framework::LogLevel::ERRORS);
          ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
  
          // Fill tensors
          fill(AccessorType(src), 0);
-        fill(AccessorType(boxes), 1, is_outside_bounds ? 0.0f - out_of_bounds_reach : 0.0f, is_outside_bounds ? 1.0f + out_of_bounds_reach : 1.0f);
-        fill(AccessorType(boxes_ind), 2, 0, static_cast<int32_t>(src_shape[3] - 1));
  
          // Compute function
          crop.run();
author	Manuel Bottini <manuel.bottini@arm.com>
	Wed, 8 Apr 2020 09:15:51 +0000 (10:15 +0100)
committer	Manuel Bottini <manuel.bottini@arm.com>
	Thu, 23 Apr 2020 17:53:59 +0000 (17:53 +0000)
arm_compute/runtime/CL/functions/CLAbsoluteDifference.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLAccumulate.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLActivationLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLBitwiseAnd.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLBitwiseNot.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLBitwiseOr.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLBitwiseXor.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLBox3x3.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLCannyEdge.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLCast.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLChannelCombine.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLChannelExtract.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLColorConvert.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLComparison.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLComputeAllAnchors.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLConcatenateLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLConvolution.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLConvolutionLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLCopy.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLCropResize.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLDepthConvertLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLDequantizationLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLDerivative.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLDilate.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLElementwiseOperations.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLEqualizeHistogram.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLErode.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLFFT1D.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLFFT2D.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLFastCorners.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLFill.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLFillBorder.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLFlattenLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLFloor.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLGEMM.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLGather.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLGaussian3x3.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLGaussian5x5.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLGaussianPyramid.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLHOGDescriptor.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLHOGDetector.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLHOGGradient.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLHOGMultiDetection.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLHarrisCorners.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLHistogram.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLIntegralImage.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLLSTMLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLLaplacianPyramid.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLMagnitude.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLMeanStdDev.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLMedian3x3.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLMinMaxLocation.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLNonLinearFilter.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLNormalizationLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLOpticalFlow.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLPReluLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLPadLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLPermute.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLPhase.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLPoolingLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLPriorBoxLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLQLSTMLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLQuantizationLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLRNNLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLROIAlignLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLROIPoolingLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLRange.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLReduceMean.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLReductionOperation.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLRemap.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLReorgLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLReshapeLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLReverse.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLScale.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLScharr3x3.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLSelect.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLSlice.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLSobel3x3.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLSobel5x5.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLSobel7x7.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLSoftmaxLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLStackLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLStridedSlice.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLTableLookup.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLThreshold.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLTile.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLTranspose.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLUnstack.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLUpsampleLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLWarpAffine.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLWarpPerspective.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLWinogradInputTransform.h		patch \| blob \| history
arm_compute/runtime/CL/functions/CLYOLOLayer.h		patch \| blob \| history
src/runtime/CL/functions/CLAbsoluteDifference.cpp		patch \| blob \| history
src/runtime/CL/functions/CLAccumulate.cpp		patch \| blob \| history
src/runtime/CL/functions/CLActivationLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLArgMinMaxLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLBatchNormalizationLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLBatchToSpaceLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLBitwiseAnd.cpp		patch \| blob \| history
src/runtime/CL/functions/CLBitwiseNot.cpp		patch \| blob \| history
src/runtime/CL/functions/CLBitwiseOr.cpp		patch \| blob \| history
src/runtime/CL/functions/CLBitwiseXor.cpp		patch \| blob \| history
src/runtime/CL/functions/CLBoundingBoxTransform.cpp		patch \| blob \| history
src/runtime/CL/functions/CLBox3x3.cpp		patch \| blob \| history
src/runtime/CL/functions/CLCannyEdge.cpp		patch \| blob \| history
src/runtime/CL/functions/CLCast.cpp		patch \| blob \| history
src/runtime/CL/functions/CLChannelCombine.cpp		patch \| blob \| history
src/runtime/CL/functions/CLChannelExtract.cpp		patch \| blob \| history
src/runtime/CL/functions/CLChannelShuffleLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLColorConvert.cpp		patch \| blob \| history
src/runtime/CL/functions/CLComparison.cpp		patch \| blob \| history
src/runtime/CL/functions/CLComputeAllAnchors.cpp		patch \| blob \| history
src/runtime/CL/functions/CLConcatenateLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp		patch \| blob \| history
src/runtime/CL/functions/CLConvolution.cpp		patch \| blob \| history
src/runtime/CL/functions/CLConvolutionLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLCopy.cpp		patch \| blob \| history
src/runtime/CL/functions/CLCropResize.cpp		patch \| blob \| history
src/runtime/CL/functions/CLDeconvolutionLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp		patch \| blob \| history
src/runtime/CL/functions/CLDepthConvertLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLDepthToSpaceLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLDequantizationLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLDerivative.cpp		patch \| blob \| history
src/runtime/CL/functions/CLDilate.cpp		patch \| blob \| history
src/runtime/CL/functions/CLDirectConvolutionLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLElementwiseOperations.cpp		patch \| blob \| history
src/runtime/CL/functions/CLEqualizeHistogram.cpp		patch \| blob \| history
src/runtime/CL/functions/CLErode.cpp		patch \| blob \| history
src/runtime/CL/functions/CLFFT1D.cpp		patch \| blob \| history
src/runtime/CL/functions/CLFFT2D.cpp		patch \| blob \| history
src/runtime/CL/functions/CLFFTConvolutionLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLFastCorners.cpp		patch \| blob \| history
src/runtime/CL/functions/CLFill.cpp		patch \| blob \| history
src/runtime/CL/functions/CLFillBorder.cpp		patch \| blob \| history
src/runtime/CL/functions/CLFlattenLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLFloor.cpp		patch \| blob \| history
src/runtime/CL/functions/CLFullyConnectedLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLFuseBatchNormalization.cpp		patch \| blob \| history
src/runtime/CL/functions/CLGEMM.cpp		patch \| blob \| history
src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp		patch \| blob \| history
src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp		patch \| blob \| history
src/runtime/CL/functions/CLGather.cpp		patch \| blob \| history
src/runtime/CL/functions/CLGaussian3x3.cpp		patch \| blob \| history
src/runtime/CL/functions/CLGaussian5x5.cpp		patch \| blob \| history
src/runtime/CL/functions/CLGaussianPyramid.cpp		patch \| blob \| history
src/runtime/CL/functions/CLGenerateProposalsLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLHOGDescriptor.cpp		patch \| blob \| history
src/runtime/CL/functions/CLHOGDetector.cpp		patch \| blob \| history
src/runtime/CL/functions/CLHOGGradient.cpp		patch \| blob \| history
src/runtime/CL/functions/CLHOGMultiDetection.cpp		patch \| blob \| history
src/runtime/CL/functions/CLHarrisCorners.cpp		patch \| blob \| history
src/runtime/CL/functions/CLHistogram.cpp		patch \| blob \| history
src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLIntegralImage.cpp		patch \| blob \| history
src/runtime/CL/functions/CLL2NormalizeLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLLSTMLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLLSTMLayerQuantized.cpp		patch \| blob \| history
src/runtime/CL/functions/CLLaplacianPyramid.cpp		patch \| blob \| history
src/runtime/CL/functions/CLLaplacianReconstruct.cpp		patch \| blob \| history
src/runtime/CL/functions/CLLocallyConnectedLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLMagnitude.cpp		patch \| blob \| history
src/runtime/CL/functions/CLMeanStdDev.cpp		patch \| blob \| history
src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLMedian3x3.cpp		patch \| blob \| history
src/runtime/CL/functions/CLMinMaxLocation.cpp		patch \| blob \| history
src/runtime/CL/functions/CLNonLinearFilter.cpp		patch \| blob \| history
src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp		patch \| blob \| history
src/runtime/CL/functions/CLNormalizationLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLOpticalFlow.cpp		patch \| blob \| history
src/runtime/CL/functions/CLPReluLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLPadLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLPermute.cpp		patch \| blob \| history
src/runtime/CL/functions/CLPhase.cpp		patch \| blob \| history
src/runtime/CL/functions/CLPixelWiseMultiplication.cpp		patch \| blob \| history
src/runtime/CL/functions/CLPoolingLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLPriorBoxLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLQLSTMLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLQuantizationLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLRNNLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLROIAlignLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLROIPoolingLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLRange.cpp		patch \| blob \| history
src/runtime/CL/functions/CLReduceMean.cpp		patch \| blob \| history
src/runtime/CL/functions/CLReductionOperation.cpp		patch \| blob \| history
src/runtime/CL/functions/CLRemap.cpp		patch \| blob \| history
src/runtime/CL/functions/CLReorgLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLReshapeLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLReverse.cpp		patch \| blob \| history
src/runtime/CL/functions/CLScale.cpp		patch \| blob \| history
src/runtime/CL/functions/CLScharr3x3.cpp		patch \| blob \| history
src/runtime/CL/functions/CLSelect.cpp		patch \| blob \| history
src/runtime/CL/functions/CLSlice.cpp		patch \| blob \| history
src/runtime/CL/functions/CLSobel3x3.cpp		patch \| blob \| history
src/runtime/CL/functions/CLSobel5x5.cpp		patch \| blob \| history
src/runtime/CL/functions/CLSobel7x7.cpp		patch \| blob \| history
src/runtime/CL/functions/CLSoftmaxLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLSpaceToBatchLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLSpaceToDepthLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLStackLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLStridedSlice.cpp		patch \| blob \| history
src/runtime/CL/functions/CLTableLookup.cpp		patch \| blob \| history
src/runtime/CL/functions/CLThreshold.cpp		patch \| blob \| history
src/runtime/CL/functions/CLTile.cpp		patch \| blob \| history
src/runtime/CL/functions/CLTranspose.cpp		patch \| blob \| history
src/runtime/CL/functions/CLUnstack.cpp		patch \| blob \| history
src/runtime/CL/functions/CLUpsampleLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLWarpAffine.cpp		patch \| blob \| history
src/runtime/CL/functions/CLWarpPerspective.cpp		patch \| blob \| history
src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp		patch \| blob \| history
src/runtime/CL/functions/CLWinogradInputTransform.cpp		patch \| blob \| history
src/runtime/CL/functions/CLYOLOLayer.cpp		patch \| blob \| history
tests/validation/fixtures/CropResizeFixture.h		patch \| blob \| history