Binaries available at https://github.com/ARM-software/ComputeLibrary/releases.
-License & Contributions: The software is provided under MIT license. Contributions to this project are accepted under the same license.
+### Supported Architectures/Technologies
+
+- Arm® CPUs:
+ - Arm® Cortex®-A processor family using Arm® Neon™ technology
+ - Arm® Cortex®-R processor family with Armv8-R AArch64 architecture using Arm® Neon™ technology
+ - Arm® Cortex®-X1 processor using Arm® Neon™ technology
+
+- Arm® Mali™ GPUs:
+ - Arm® Mali™-G processor family
+ - Arm® Mali™-T processor family
+
+- x86
+
+### Supported OS
+
+- Androidâ„¢
+- Bare Metal
+- Linux®
+- macOS®
+- Tizenâ„¢
+
+## License and Contributions
+
+The software is provided under MIT license. Contributions to this project are accepted under the same license.
### Public mailing list
For technical discussion, the ComputeLibrary project has a public mailing list: acl-dev@lists.linaro.org
```Signed-off-by: John Doe <john.doe@example.org>```
You must use your real name, no pseudonyms or anonymous contributions are accepted.
+
+## Trademarks and Copyrights
+
+Android is a trademark of Google LLC.
+
+Arm, Cortex and Mali are registered trademarks or trademarks of Arm Limited (or its subsidiaries) in the US and/or elsewhere.
+
+Linux® is the registered trademark of Linus Torvalds in the U.S. and other countries.
+
+Mac and macOS are trademarks of Apple Inc., registered in the U.S. and other
+countries.
+
+Tizen is a registered trademark of The Linux Foundation.
Return()
if env['neon'] and 'x86' in env['arch']:
- print("Cannot compile NEON for x86")
+ print("Cannot compile Neon for x86")
Exit(1)
if env['set_soname'] and not version_at_least(SCons.__version__, "2.4"):
print("GCC 6.2.1 or newer is required to compile armv8.2-a code")
Exit(1)
elif env['arch'] == 'arm64-v8a' and not version_at_least(compiler_ver, '4.9'):
- print("GCC 4.9 or newer is required to compile NEON code for AArch64")
+ print("GCC 4.9 or newer is required to compile Neon code for AArch64")
Exit(1)
if version_at_least(compiler_ver, '6.1'):
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class Coordinates;
-/** Interface for NEON tensor */
+/** Interface for Neon tensor */
class ITensor
{
public:
os << "UNSPECIFIED";
break;
case Target::NEON:
- os << "NEON";
+ os << "Neon";
break;
case Target::CL:
os << "CL";
enum class Target
{
UNSPECIFIED, /**< Unspecified Target */
- NEON, /**< NEON capable target device */
+ NEON, /**< Neon capable target device */
CL, /**< OpenCL capable target device */
GC, /**< GLES compute capable target device */
};
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/** Returns default target for execution
*
* @note If an OpenCL backend exists then OpenCL is returned,
- * else if the NEON backend exists returns NEON as target.
+ * else if the Neon backend exists returns Neon as target.
* If no backends are registered an error is raised.
*
* @return Default target
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
namespace backends
{
-/** NEON device backend */
+/** Neon device backend */
class NEDeviceBackend final : public IDeviceBackend
{
public:
std::shared_ptr<arm_compute::IWeightsManager> create_weights_manager() override;
private:
- Allocator _allocator; /**< NEON backend allocator */
+ Allocator _allocator; /**< Neon backend allocator */
};
} // namespace backends
} // namespace graph
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
namespace backends
{
-/** Factory for generating NEON backend functions **/
+/** Factory for generating Neon backend functions **/
class NEFunctionFactory final
{
public:
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
namespace backends
{
-/** NEON Sub-Tensor handle interface object **/
+/** Neon Sub-Tensor handle interface object **/
class NESubTensorHandle final : public ITensorHandle
{
public:
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
namespace backends
{
-/** NEON Tensor handle interface object **/
+/** Neon Tensor handle interface object **/
class NETensorHandle final : public ITensorHandle
{
public:
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
*
* ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
*
- * This function calls the following NEON kernels:
+ * This function calls the following CL kernels:
*
* -# @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel
*
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class ICLTensor;
using ICLImage = ICLTensor;
-/** Basic function to execute harris corners detection. This function calls the following CL and NEON kernels and functions:
+/** Basic function to execute harris corners detection. This function calls the following CL and Neon kernels and functions:
*
* @note Requires CPU support for the kernels: CPPCornerCandidatesKernel and CPPSortEuclideanDistanceKernel.
*
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
virtual ~IFunction() = default;
/** Run the kernels contained in the function
*
- * For NEON kernels:
+ * For Neon kernels:
* - Multi-threading is used for the kernels which are parallelisable.
* - By default std::thread::hardware_concurrency() threads are used.
*
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* to generate a unique id. We use the following conversion using an unsigned 32bit value:
*
* Lower two bits store the target:
- * 00 -> NEON
+ * 00 -> Neon
* 01 -> CL
* 10 -> GLES
* 11 -> Unused
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
using INEKernel = ICPPKernel;
namespace experimental
{
-/** Basic interface for functions which have a single async NEON kernel */
+/** Basic interface for functions which have a single async Neon kernel */
class INEOperator : public IOperator
{
public:
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class ICPPKernel;
class NEFillBorderKernel;
using INEKernel = ICPPKernel;
-/** Basic interface for functions which have a single NEON kernel */
+/** Basic interface for functions which have a single Neon kernel */
class INESimpleFunction : public IFunction
{
public:
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ICPPKernel;
using INEKernel = ICPPKernel;
-/** Basic interface for functions which have a single NEON kernel and no border */
+/** Basic interface for functions which have a single Neon kernel and no border */
class INESimpleFunctionNoBorder : public IFunction
{
public:
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEFUNCTIONS_H
#define ARM_COMPUTE_NEFUNCTIONS_H
-/* Header regrouping all the NEON functions */
+/* Header regrouping all the Neon functions */
#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h"
#include "arm_compute/runtime/NEON/functions/NEAccumulate.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
namespace arm_compute
{
-/** NEON Scheduler */
+/** Neon Scheduler */
using NEScheduler = Scheduler;
}
#endif /*ARM_COMPUTE_NESCHEDULER_H */
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/** Function to calculate the index of the minimum or maximum values in a
* tensor based on an axis.
*
- * This function calls the following NEON kernels:
+ * This function calls the following Neon kernels:
*
* -# @ref NEReductionOperationKernel
* -# @ref NEFillBorderKernel
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** Basic function to execute box filter 3x3. This function calls the following NEON kernels:
+/** Basic function to execute box filter 3x3. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NEBox3x3Kernel
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NEEdgeNonMaxSuppressionKernel;
class NEEdgeTraceKernel;
-/** Basic function to execute canny edge on NEON. This function calls the following NEON kernels and functions:
+/** Basic function to execute canny edge on Neon. This function calls the following Neon kernels and functions:
*
* -# @ref NEFillBorderKernel (if border_mode == REPLICATE or border_mode == CONSTANT)
* -# @ref NESobel3x3 (if gradient_size == 3) or
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
template <unsigned int matrix_size>
class NESeparableConvolutionVertKernel;
-/** Basic function to execute convolution of size 3x3. This function calls the following NEON kernels:
+/** Basic function to execute convolution of size 3x3. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NEConvolution3x3Kernel
void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
};
-/** Basic function to execute convolution of size 5x5, 7x7, 9x9. This function calls the following NEON kernels:
+/** Basic function to execute convolution of size 5x5, 7x7, 9x9. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NEConvolutionKernel or<br/>
/** Basic function to run 9x9 convolution. */
using NEConvolution9x9 = NEConvolutionSquare<9>;
-/** Basic function to execute non-square convolution. This function calls the following NEON kernels:
+/** Basic function to execute non-square convolution. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NEConvolutionRectangleKernel or<br/>
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
// Forward declarations
class ITensor;
-/** Basic function to simulate a convolution layer. This function calls one of the following NEON functions:
+/** Basic function to simulate a convolution layer. This function calls one of the following Neon functions:
* -# @ref NEGEMMConvolutionLayer (executed only in case GEMM is required for the operation)
* -# @ref NEWinogradConvolutionLayer (executed only in case Winograd is required for the operation)
* -# @ref NEDirectConvolutionLayer (executed only in case Direct Convolution is required for the operation)
std::unique_ptr<IFunction> _function; /**< Function to run */
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECONVOLUTIONLAYER_H */
\ No newline at end of file
+#endif /* ARM_COMPUTE_NECONVOLUTIONLAYER_H */
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the
* reverse order to perform an actual convolution. This is achieved by using @ref NEReverse.
*
- * This function calls the following NEON kernels/functions:
+ * This function calls the following Neon kernels/functions:
*
* -# @ref CPPUpsample
* -# @ref NEConvolutionLayer
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
- /** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:
+ /** Basic function to execute optimized depthwise convolution routines. This function calls the following Neon kernels:
*
* @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
*
bool _is_prepared;
};
- /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:
+ /** Basic function to execute a generic depthwise convolution. This function calls the following Neon kernel:
*
* -# @ref NEDepthwiseConvolutionLayerNativeKernel
*
NEDepthwiseConvolutionLayerGeneric _func_generic;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */
\ No newline at end of file
+#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NEDerivativeKernel;
class NEFillBorderKernel;
-/** Basic function to execute first order derivative operator. This function calls the following NEON kernels:
+/** Basic function to execute first order derivative operator. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NEDerivativeKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** Basic function to execute dilate. This function calls the following NEON kernels:
+/** Basic function to execute dilate. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NEDilateKernel
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/** Function to run the direct convolution.
*
- * This function calls the following NEON kernels:
+ * This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel for the input
* -# @ref NEDirectConvolutionLayerOutputStageKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NETableLookupKernel;
using IImage = ITensor;
-/** Basic function to execute histogram equalization. This function calls the following NEON kernels:
+/** Basic function to execute histogram equalization. This function calls the following Neon kernels:
*
* -# @ref NEHistogramKernel
* -# @ref NECumulativeDistributionKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** Basic function to execute erode. This function calls the following NEON kernels:
+/** Basic function to execute erode. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NEErodeKernel
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NEFFTRadixStageKernel;
class NEFFTScaleKernel;
-/** Basic function to execute one dimensional FFT. This function calls the following NEON kernels:
+/** Basic function to execute one dimensional FFT. This function calls the following Neon kernels:
*
* -# @ref NEFFTDigitReverseKernel Performs digit reverse
* -# @ref NEFFTRadixStageKernel A list of FFT kernels depending on the radix decomposition
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
// Forward declaration
class ITensor;
-/** Basic function to execute two dimensional FFT. This function calls the following NEON kernels:
+/** Basic function to execute two dimensional FFT. This function calls the following Neon kernels:
*
* -# @ref NEFFT1D 1D FFT is performed on the first given axis
* -# @ref NEFFT1D 1D FFT is performed on the second given axis
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
// Forward declarations
class ITensor;
-/** Basic function to execute FFT-based convolution on NEON. This function calls the following NEON functions/kernels:
+/** Basic function to execute FFT-based convolution on Neon. This function calls the following Neon functions/kernels:
*
* -# @ref NEPermute Permute input if NHWC(only NCHW is supported).
* -# @ref NEPadLayer Pad input.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for NEON backend.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for Neon backend.
*/
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for NEON backend.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for Neon backend.
*
* @return a status
*/
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NEFillArrayKernel;
using IImage = ITensor;
-/** Basic function to execute fast corners. This function call the following NEON kernels:
+/** Basic function to execute fast corners. This function call the following Neon kernels:
*
* -# @ref NEFastCornersKernel
* -# @ref NENonMaximaSuppression3x3Kernel (executed if nonmax_suppression == true)
namespace arm_compute
{
-/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels:
+/** Basic function to reshape the weights of Fully Connected layer with Neon. This function calls the following kernels:
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
};
} // namespace weights_transformations
-/** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels:
+/** Basic function to compute a Fully Connected layer on Neon. This function calls the following Neon kernels:
* -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
* -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
* -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
class NEGEMMTranspose1xWKernel;
class NEGEMMAssemblyDispatch;
-/** Basic function to execute GEMM on NEON. This function calls the following NEON kernels:
+/** Basic function to execute GEMM on Neon. This function calls the following Neon kernels:
*
* If optimized assembly is available:
* -# @ref NEGEMMAssemblyDispatch
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class ITensor;
class NEGEMMAssemblyDispatch;
-/** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions:
+/** Basic function to compute the convolution layer. This function calls the following Neon kernels/functions:
*
* Supports only NHWC data layout
*
};
} // namespace weights_transformations
-/** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions:
+/** Basic function to compute the convolution layer. This function calls the following Neon kernels/functions:
*
* -# @ref NEIm2ColKernel
* -# @ref NEGEMM (if the data type is BFLOAT16/FP16/FP32)
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NEGEMMTranspose1xWKernel;
class NEGEMMAssemblyDispatch;
-/** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available:
+/** Basic function to execute GEMMLowpMatrixMultiplyCore on Neon. This function calls the following Neon kernels if the DOT product instruction is not available:
*
* -# @ref NEGEMMInterleave4x4Kernel
* -# @ref NEGEMMTranspose1xWKernel
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-/** This file contains all available output stages for GEMMLowp on NEON.
+/** This file contains all available output stages for GEMMLowp on Neon.
*
* In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyCore),
* and processes it to obtain the final ASYMM8 value.
class ITensor;
class ITensorInfo;
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on NEON.
+/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on Neon.
*
* NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint depends on 3 parameters:
*
*
* ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
*
- * This function calls the following NEON kernels:
+ * This function calls the following Neon kernels:
*
* -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
*
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on NEON.
+/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on Neon.
*
* NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint depends on 3 parameters:
*
*
* ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
*
- * This function calls the following NEON kernels:
+ * This function calls the following Neon kernels:
*
* -# @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
*
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on NEON.
+/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on Neon.
*
* NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint depends on 2 parameters:
*
*
* ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
*
- * This function calls the following NEON kernels:
+ * This function calls the following Neon kernels:
*
* -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
*
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
};
-/** Basic function to execute GEMMLowpQuantizeDown kernels on NEON.
+/** Basic function to execute GEMMLowpQuantizeDown kernels on Neon.
*
- * This function calls the following NEON kernels:
+ * This function calls the following Neon kernels:
*
* -# @ref NEGEMMLowpQuantizeDownInt32ScaleKernel
* -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** Basic function to execute gaussian filter 3x3. This function calls the following NEON kernels:
+/** Basic function to execute gaussian filter 3x3. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NEGaussian3x3Kernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NEGaussian5x5VertKernel;
class NEFillBorderKernel;
-/** Basic function to execute gaussian filter 5x5. This function calls the following NEON kernels:
+/** Basic function to execute gaussian filter 5x5. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NEGaussian5x5HorKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
Pyramid _tmp;
};
-/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following NEON kernels:
+/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NEGaussianPyramidHorKernel
std::vector<std::unique_ptr<NEGaussianPyramidVertKernel>> _vertical_reduction;
};
-/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following NEON kernels and functions:
+/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following Neon kernels and functions:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NEGaussian5x5
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NEHOGOrientationBinningKernel;
class NEHOGBlockNormalizationKernel;
-/** Basic function to calculate HOG descriptor. This function calls the following NEON kernels:
+/** Basic function to calculate HOG descriptor. This function calls the following Neon kernels:
*
* -# @ref NEHOGGradient
* -# @ref NEHOGOrientationBinningKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
class ITensorInfo;
-/** Basic function to execute HOG detector based on linear SVM. This function calls the following NEON kernel:
+/** Basic function to execute HOG detector based on linear SVM. This function calls the following Neon kernel:
*
* -# @ref NEHOGDetectorKernel
*
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class ITensor;
class ICPPKernel;
-/** Basic function to calculate the gradient for HOG. This function calls the following NEON kernels:
+/** Basic function to calculate the gradient for HOG. This function calls the following Neon kernels:
*
* -# @ref NEDerivative
* -# NEMagnitudePhaseKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NEHOGOrientationBinningKernel;
class NEHOGBlockNormalizationKernel;
-/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following NEON kernels:
+/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following Neon kernels:
*
* -# @ref NEHOGGradient
* -# @ref NEHOGOrientationBinningKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class INEHarrisScoreKernel;
using IImage = ITensor;
-/** Basic function to execute harris corners detection. This function calls the following NEON kernels and functions:
+/** Basic function to execute harris corners detection. This function calls the following Neon kernels and functions:
*
* -# @ref NESobel3x3 (if gradient_size == 3) or<br/>
* @ref NESobel5x5 (if gradient_size == 5) or<br/>
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/** Basic function to run @ref NELSTMLayerQuantized
*
- * This function calls the following NEON functions/kernels:
+ * This function calls the following Neon functions/kernels:
*
* -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers
* -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** Basic function to execute laplacian pyramid. This function calls the following NEON kernels and functions:
+/** Basic function to execute laplacian pyramid. This function calls the following Neon kernels and functions:
*
* -# @ref NEGaussianPyramidHalf
* -# @ref NEGaussian5x5
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class ITensor;
using IImage = ITensor;
-/** Basic function to execute laplacian reconstruction. This function calls the following NEON kernels and functions:
+/** Basic function to execute laplacian reconstruction. This function calls the following Neon kernels and functions:
*
* -# @ref NEArithmeticAddition
* -# @ref NEScale
class NEFill;
class NEMaxUnpoolingLayerKernel;
-/** Function to perform MaxUnpooling. This function calls the following NEON kernels:
+/** Function to perform MaxUnpooling. This function calls the following Neon kernels:
*
* -# @ref NEFill
* -# @ref NEMaxUnpoolingLayerKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NEMeanStdDevKernel;
class NEFillBorderKernel;
-/** Basic function to execute mean and std deviation. This function calls the following NEON kernels:
+/** Basic function to execute mean and std deviation. This function calls the following Neon kernels:
*
* @ref NEMeanStdDevKernel
*
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** Basic function to execute median filter. This function calls the following NEON kernels:
+/** Basic function to execute median filter. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NEMedian3x3Kernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NEMinMaxLocationKernel;
using IImage = ITensor;
-/** Basic function to execute min and max location. This function calls the following NEON kernels:
+/** Basic function to execute min and max location. This function calls the following Neon kernels:
*
* -# NEMinMaxKernel
* -# NEMinMaxLocationKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** Basic function to execute non linear filter. This function calls the following NEON kernels:
+/** Basic function to execute non linear filter. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NENonLinearFilterKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following NEON kernels:
+/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NENonMaximaSuppression3x3Kernel
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class ITensor;
class NENormalizationLayerKernel;
-/** Basic function to compute a normalization layer. This function calls the following NEON kernels:
+/** Basic function to compute a normalization layer. This function calls the following Neon kernels:
*
* -# @ref NEPixelWiseMultiplication
* -# @ref NEFillBorderKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/** Array of LK Internel Keypoints */
using LKInternalKeypointArray = Array<NELKInternalKeypoint>;
-/** Basic function to execute optical flow. This function calls the following NEON kernels and functions:
+/** Basic function to execute optical flow. This function calls the following Neon kernels and functions:
*
* -# @ref NEScharr3x3
* -# @ref NELKTrackerKernel
{
class NEPadLayerKernel;
-/** Basic function to pad a tensor. This function calls the following NEON functions/kernels:
+/** Basic function to pad a tensor. This function calls the following Neon functions/kernels:
*
* - For padding mode = PaddingMode::CONSTANT:
* -# @ref NEPadLayerKernel
NEPermute &operator=(const NEPermute &) = delete;
/** Default move assignment operator */
NEPermute &operator=(NEPermute &&);
- /** Configure the permute NEON kernel
+ /** Configure the permute Neon kernel
*
* @note Arbitrary permutation vectors are supported with rank not greater than 4
*
class ITensor;
class ITensorInfo;
-/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following NEON kernels:
+/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if padding size is different from zero)
* -# @ref cpu::kernels::CpuPoolingKernel
/** Basic function to run @ref NEQLSTMLayer
*
- * This function calls the following NEON functions/kernels:
+ * This function calls the following Neon functions/kernels:
*
* -# @ref NEActivationLayer Activation functions (tanh and logistic)
* -# @ref NEArithmeticAddition Elementwise addition
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class ITensor;
class ITensorInfo;
-/** Basic function to simulate a quantization layer. This function calls the following NEON kernels:
+/** Basic function to simulate a quantization layer. This function calls the following Neon kernels:
*
*
* -# @ref NEQuantizationLayerKernel
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/** Basic function to run @ref NEROIAlignLayerKernel.
*
- * This function calls the following NEON kernels:
+ * This function calls the following Neon kernels:
* -# @ref NEROIAlignLayerKernel
*
*/
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/** Basic function to run @ref NEROIPoolingLayerKernel.
*
- * This function calls the following NEON kernels:
+ * This function calls the following Neon kernels:
* -# @ref NEROIPoolingLayerKernel
*
*/
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class ITensor;
class NEReductionOperationKernel;
-/** Basic function to simulate a reduction operation. This function calls the following NEON kernels:
+/** Basic function to simulate a reduction operation. This function calls the following Neon kernels:
*
* -# @ref NEReshapeLayer
* -# @ref NEReductionOperationKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** Basic function to execute remap. This function calls the following NEON kernels:
+/** Basic function to execute remap. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NERemapKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** Basic function to execute scharr 3x3 filter. This function calls the following NEON kernels:
+/** Basic function to execute scharr 3x3 filter. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NEScharr3x3Kernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** Basic function to execute sobel 3x3 filter. This function calls the following NEON kernels:
+/** Basic function to execute sobel 3x3 filter. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NESobel3x3Kernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NESobel5x5VertKernel;
class NEFillBorderKernel;
-/** Basic function to execute sobel 5x5 filter. This function calls the following NEON kernels:
+/** Basic function to execute sobel 5x5 filter. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NESobel5x5HorKernel
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NESobel7x7VertKernel;
class NEFillBorderKernel;
-/** Basic function to execute sobel 7x7 filter. This function calls the following NEON kernels:
+/** Basic function to execute sobel 7x7 filter. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
* -# @ref NESobel7x7HorKernel
class NESpaceToBatchLayerKernel;
class NEFill;
-/** Basic function to spatial divide a tensor. This function calls the following NEON kernels/functions:
+/** Basic function to spatial divide a tensor. This function calls the following Neon kernels/functions:
*
* -# @ref NEFill
* -# @ref NESpaceToBatchLayerKernel
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class ITensorInfo;
class NESpaceToDepthLayerKernel;
-/** This function calls the following NEON kernels/functions:
+/** This function calls the following Neon kernels/functions:
*
* -# @ref NESpaceToDepthLayerKernel
*/
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class ITensor;
class ITensorInfo;
-/** Basic function to transpose a matrix on NEON. This function calls the following NEON kernel:
+/** Basic function to transpose a matrix on Neon. This function calls the following Neon kernel:
*
* -# @ref NETransposeKernel
*
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class ITensor;
class ICPPKernel;
-/** Basic function to simulate a convolution layer. This function calls the following NEON kernels:
+/** Basic function to simulate a convolution layer. This function calls the following Neon kernels:
* -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method )
* -# @ref NEWinogradLayerTransformInputKernel
* -# @ref NEWinogradLayerTransformOutputKernel
@tableofcontents
-The Computer Vision and Machine Learning library is a set of functions optimised for both ARM CPUs and GPUs using SIMD technologies.
+The Computer Vision and Machine Learning library is a set of functions optimised for both Arm CPUs and GPUs using SIMD technologies.
Several builds of the library are available using various configurations:
- - OS: Linux, Android, macOS or bare metal.
- - Architecture: armv7a (32bit) or arm64-v8a (64bit)
- - Technology: NEON / OpenCL / GLES_COMPUTE / NEON and OpenCL and GLES_COMPUTE
+ - OS: Android or Linux.
+ - Architecture: armv7a (32bit) or arm64-v8a (64bit).
+ - Technology: Neon / OpenCL / Neon and OpenCL.
- Debug / Asserts / Release: Use a build with asserts enabled to debug your application and enable extra validation. Once you are sure your application works as expected you can switch to a release build of the library for maximum performance.
@section S0_1_contact Contact / Support
- @ref CLLogicalNot
- @ref CLLogicalAnd
- @ref CLLogicalOr
- - New NEON kernels / functions:
+ - New Neon kernels / functions:
- @ref NELogicalNot
- @ref NELogicalAnd
- @ref NELogicalOr
- - Removed padding from NEON kernels:
+ - Removed padding from Neon kernels:
- @ref NEComplexPixelWiseMultiplicationKernel
- @ref NENonMaximaSuppression3x3Kernel
- @ref NERemapKernel
- CLWarpAffineKernel
- CLWarpPerspective
- CLWarpPerspectiveKernel
- - Deprecated NEON kernels / functions (If a kernel is used only by the function that is being deprecated, the kernel is deprecated together):
+ - Deprecated Neon kernels / functions (If a kernel is used only by the function that is being deprecated, the kernel is deprecated together):
- NELocallyConnectedLayer
- NELocallyConnectedMatrixMultiplyKernel
- NEAbsoluteDifference
- @ref CLScaleKernel
- New OpenCL kernels / functions:
- @ref CLMaxUnpoolingLayerKernel
- - New NEON kernels / functions:
+ - New Neon kernels / functions:
- @ref NEMaxUnpoolingLayerKernel
- New graph example:
- graph_yolov3_output_detector
- Removed OpenCL kernels / functions:
- CLGEMMLowpQuantizeDownInt32ToUint8Scale
- CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat
- - Removed NEON kernels / functions:
+ - Removed Neon kernels / functions:
- NEGEMMLowpQuantizeDownInt32ToUint8Scale
- NEGEMMMatrixAccumulateBiasesKernel
- Deprecated functions / interfaces:
- New OpenCL kernels / functions:
- @ref CLQLSTMLayer
- @ref CLQLSTMLayerNormalizationKernel
- - New NEON kernels / functions:
+ - New Neon kernels / functions:
- @ref NEQLSTMLayer
- @ref NEQLSTMLayerNormalizationKernel
- Added HARD_SWISH support in:
- Deprecated OpenCL kernels / functions:
- CLGEMMLowpQuantizeDownInt32ToUint8Scale
- CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat
- - Deprecated NEON kernels / functions:
+ - Deprecated Neon kernels / functions:
- NEGEMMLowpQuantizeDownInt32ToUint8Scale
- Removed CPP kernels / functions:
- CPPFlipWeightsKernel
- Removed PoolingLayerInfo constructors without Data Layout.
- Removed CLDepthwiseConvolutionLayer3x3
- Removed NEDepthwiseConvolutionLayerOptimized
- - Added support for Winograd 3x3,4x4 on NEON FP16:
+ - Added support for Winograd 3x3,4x4 on Neon FP16:
- @ref NEWinogradConvolutionLayer
- @ref NEWinogradLayerTransformInputKernel
- @ref NEWinogradLayerTransformOutputKernel
- @ref NEWinogradLayerTransformWeightsKernel
- Added CLCompileContext
- - Added NEON GEMM kernel with 2D window support
+ - Added Neon GEMM kernel with 2D window support
v20.02.1 Maintenance release
- Added Android-NN build script.
- New OpenCL kernels / functions:
- @ref CLFill
- CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel / @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
- - New NEON kernels / functions:
+ - New Neon kernels / functions:
- @ref NEFill
- @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel / @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
- - Deprecated NEON functions / interfaces:
+ - Deprecated Neon functions / interfaces:
- CLDepthwiseConvolutionLayer3x3
- NEDepthwiseConvolutionLayerOptimized
- PoolingLayerInfo constructors without Data Layout.
- - Added support for quantization with multiplier greater than 1 on NEON and CL.
+ - Added support for quantization with multiplier greater than 1 on Neon and CL.
- Added support for quantized inputs of type QASYMM8_SIGNED and QASYMM8 to @ref CLQuantizationLayer.
- Added the ability to build bootcode for bare metal.
- Added support for generating synthetic QASYMM8 graphs.
- CLDepthwiseSeparableConvolutionLayer
- CLDepthwiseVectorToTensorKernel
- CLDirectConvolutionLayerOutputStageKernel
- - Deprecated NEON kernels / functions:
+ - Deprecated Neon kernels / functions:
- NEDepthwiseWeightsReshapeKernel
- NEDepthwiseIm2ColKernel
- NEDepthwiseSeparableConvolutionLayer
- @ref CLDepthwiseConvolutionLayerNativeKernel to replace the old generic depthwise convolution (see Deprecated
OpenCL kernels / functions)
- @ref CLLogSoftmaxLayer
- - New NEON kernels / functions:
+ - New Neon kernels / functions:
- @ref NEBoundingBoxTransformKernel / @ref NEBoundingBoxTransform
- @ref NEComputeAllAnchorsKernel / NEComputeAllAnchors
- @ref NEDetectionPostProcessLayer
- Replaced the calls to CLCopyKernel and CLMemsetKernel with @ref CLPadLayer in @ref CLGenerateProposalsLayer.
- Improved performance for CL Inception V3 - FP16.
- Improved accuracy for CL Inception V3 - FP16 by enabling FP32 accumulator (mixed-precision).
- - Improved NEON performance by enabling fusing batch normalization with convolution and depth-wise convolution layer.
- - Improved NEON performance for MobileNet-SSD by improving the output detection performance.
+ - Improved Neon performance by enabling fusing batch normalization with convolution and depth-wise convolution layer.
+ - Improved Neon performance for MobileNet-SSD by improving the output detection performance.
- Optimized @ref CLPadLayer.
- Optimized CL generic depthwise convolution layer by introducing @ref CLDepthwiseConvolutionLayerNativeKernel.
- Reduced memory consumption by implementing weights sharing.
v19.08 Public major release
- Various bug fixes.
- Various optimisations.
- - Deprecated NEON functions
+ - Deprecated Neon functions
- NEDepthConcatenateLayer
- NEWidthConcatenateLayer
- Deprecated OpenCL kernels / functions
- CLGEMMInterleave4x4Kernel / CLGEMMInterleave4x4
- CLGEMMTranspose1xWKernel / CLGEMMTranspose1xW
- CLWidthConcatenateLayer
- - New NEON kernels / functions:
+ - New Neon kernels / functions:
- @ref NEAbsLayer
- @ref NECast
- @ref NEElementwisePower
- Added support for REDUCE_MIN and REDUCE_MAX in @ref ReductionOperation
- Enable the fusion of batch normalization with convolution and depthwise convolution layer for FP32 in the graph API (OpenCL only)
- Added support for fusing activation function and broadcast addition with the matrix multiplication for FP32 (OpenCL only)
- - Re-factored the depthwise convolution layer kernel on NEON for generic cases
- - Added an optimized depthwise convolution layer kernel for 5x5 filters (NEON only)
+ - Re-factored the depthwise convolution layer kernel on Neon for generic cases
+ - Added an optimized depthwise convolution layer kernel for 5x5 filters (Neon only)
- Added support to enable OpenCL kernel cache. Added example showing how to load the prebuilt OpenCL kernels from a binary cache file
- Altered @ref QuantizationInfo interface to support per-channel quantization.
- The CLDepthwiseConvolutionLayer3x3 will be included by @ref CLDepthwiseConvolutionLayer to accommodate for future optimizations.
- The NEDepthwiseConvolutionLayerOptimized will be included by @ref NEDepthwiseConvolutionLayer to accommodate for future optimizations.
- Removed inner_border_right and inner_border_top parameters from @ref CLDeconvolutionLayer interface
- Removed inner_border_right and inner_border_top parameters from @ref NEDeconvolutionLayer interface
- - Optimized the NEON assembly kernel for GEMMLowp. The new implementation fuses the output stage and quantization with the matrix multiplication kernel
+ - Optimized the Neon assembly kernel for GEMMLowp. The new implementation fuses the output stage and quantization with the matrix multiplication kernel
v19.05 Public major release
- Various bug fixes.
- Add support for QASYMM8 in NEArithmeticSubtractionKernel.
- Add support for QASYMM8 in NEPixelWiseMultiplicationKernel.
- Add support for QASYMM8 NEDeconvolution.
- - Add support for DequantizationLayer for NEON/CL.
+ - Add support for DequantizationLayer for Neon/CL.
- Add support for dilation in CLDepthwiseConvolution.
- Fuse offset contribution with the output stage when we use NEGEMMLowpMatrixMultiplyCore.
- Optimize CLDeconvolution.
- @ref NESoftmaxLayer
- Fused activation in @ref CLWinogradConvolutionLayer
- Extented @ref NEPermute to support more cases
- - Added NEON/SVE GEMM Hybrid kernels
+ - Added Neon/SVE GEMM Hybrid kernels
- Added u8 and s8 hybrid assembly kernels
- Introduced GEMM strategy name in NEGEMMAssemblyWrapper
- Improved @ref CLTuner
- Removed arm_compute::NEGEMMLowpAArch64A53Kernel / arm_compute::NEGEMMLowpAArch64Kernel / arm_compute::NEGEMMLowpAArch64V8P4Kernel / arm_compute::NEGEMMInterleavedBlockedKernel / arm_compute::NEGEMMLowpAssemblyMatrixMultiplyCore / arm_compute::NEHGEMMAArch64FP16Kernel
- Added NEGEMMAssemblyWrapper and AssemblyKernelGlue which are used to execute assembly kernels in neon functions.
- Minor changes to the CPUInfo type to make it compatible with the new assembly gemm interface.
- - Moved neon assembly kernels to the folder src/core/NEON/kernels/arm_gemm.
+ - Moved neon assembly kernels to the folder src/core/Neon/kernels/arm_gemm.
- Improved doxygen documentation.
- Improved memory management for layer's transitions.
- Added support for NHWC data layout in tensors.
- Port mobilenet example to NHWC data layout.
- Enabled Winograd method in @ref CLConvolutionLayer.
- Renamed NEWinogradLayer to @ref NEWinogradConvolutionLayer.
- - Updated @ref NEWinogradConvolutionLayer to use highly optimised assembly kernels in src/core/NEON/kernels/arm_gemm.
+ - Updated @ref NEWinogradConvolutionLayer to use highly optimised assembly kernels in src/core/Neon/kernels/arm_gemm.
- Added memory manager support in GLES functions.
- Major refactoring of the graph API.
- Added GLES backend in the graph API.
- Replaced NEDeconvolutionLayerUpsampleKernel with @ref NEScaleKernel in @ref NEDeconvolutionLayer.
- Added fast maths flag in @ref CLConvolutionLayer.
- Added new tests and benchmarks in validation and benchmark frameworks
- - Merge Activation layer with Convolution Layer (NEON. CL, GLES)
+ - Merge Activation layer with Convolution Layer (Neon. CL, GLES)
- Added support to OpenCL 2.0 SVM
- Added support to import memory in OpenCL tensors.
- Added the prepare() method to perform any one off pre-processing before running the function.
- Renamed NEWinogradLayer.cpp to @ref NEWinogradConvolutionLayer
v18.02 Public major release
- - Various NEON / OpenCL / GLES optimisations.
+ - Various Neon / OpenCL / GLES optimisations.
- Various bug fixes.
- Changed default number of threads on big LITTLE systems.
- Refactored examples and added:
- Added support for non-square pooling to @ref NEPoolingLayer and @ref CLPoolingLayer
- New OpenCL kernels / functions:
- CLDirectConvolutionLayerOutputStageKernel
- - New NEON kernels / functions
+ - New Neon kernels / functions
- Added name() method to all kernels.
- Added support for Winograd 5x5.
- NEPermuteKernel / @ref NEPermute
- @ref GCGEMMInterleave4x4Kernel
- @ref GCGEMMTranspose1xWKernel
- @ref GCIm2ColKernel
- - Refactored NEON Winograd (NEWinogradLayerKernel)
+ - Refactored Neon Winograd (NEWinogradLayerKernel)
- Added @ref NEDirectConvolutionLayerOutputStageKernel
- - Added QASYMM8 support to the following NEON kernels:
+ - Added QASYMM8 support to the following Neon kernels:
- NEDepthwiseConvolutionLayer3x3Kernel
- @ref NEFillBorderKernel
- NEPoolingLayerKernel
- Introduced logging interface
- Introduced opencl timer
- Reworked GEMMLowp interface
- - Added new NEON assembly kernels for GEMMLowp, SGEMM and HGEMM
+ - Added new Neon assembly kernels for GEMMLowp, SGEMM and HGEMM
- Added validation method for most Machine Learning kernels / functions
- Added new graph examples such as googlenet, mobilenet, squeezenet, vgg16 and vgg19
- Added sgemm example for OpenCL
- @ref GCLogits1DMaxKernel / @ref GCLogits1DShiftExpSumKernel / @ref GCLogits1DNormKernel / @ref GCSoftmaxLayer
- @ref GCTransposeKernel / @ref GCTranspose
- - New NEON kernels / functions
+ - New Neon kernels / functions
- arm_compute::NEGEMMLowpAArch64A53Kernel / arm_compute::NEGEMMLowpAArch64Kernel / arm_compute::NEGEMMLowpAArch64V8P4Kernel / arm_compute::NEGEMMInterleavedBlockedKernel / arm_compute::NEGEMMLowpAssemblyMatrixMultiplyCore
- arm_compute::NEHGEMMAArch64FP16Kernel
- NEDepthwiseConvolutionLayer3x3Kernel / NEDepthwiseIm2ColKernel / NEGEMMMatrixVectorMultiplyKernel / NEDepthwiseVectorToTensorKernel / @ref NEDepthwiseConvolutionLayer
- @ref CLGEMMLowpOffsetContributionKernel / @ref CLGEMMLowpMatrixAReductionKernel / @ref CLGEMMLowpMatrixBReductionKernel / @ref CLGEMMLowpMatrixMultiplyCore
- CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel / @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
- - New graph nodes for NEON and OpenCL
+ - New graph nodes for Neon and OpenCL
- graph::BranchLayer
- graph::DepthConvertLayer
- graph::DepthwiseConvolutionLayer
- Experimental Graph support: initial implementation of a simple stream API to easily chain machine learning layers.
- Memory Manager (@ref BlobLifetimeManager, @ref BlobMemoryPool, @ref ILifetimeManager, @ref IMemoryGroup, @ref IMemoryManager, @ref IMemoryPool, @ref IPoolManager, @ref MemoryManagerOnDemand, @ref PoolManager)
- New validation and benchmark frameworks (Boost and Google frameworks replaced by homemade framework).
- - Most machine learning functions support both fixed point 8 and 16 bit (QS8, QS16) for both NEON and OpenCL.
- - New NEON kernels / functions:
+ - Most machine learning functions support both fixed point 8 and 16 bit (QS8, QS16) for both Neon and OpenCL.
+ - New Neon kernels / functions:
- arm_compute::NEGEMMAssemblyBaseKernel arm_compute::NEGEMMAArch64Kernel
- @ref NEDequantizationLayerKernel / @ref NEDequantizationLayer
- NEFloorKernel / @ref NEFloor
v17.06 Public major release
- Various bug fixes
- - Added support for fixed point 8 bit (QS8) to the various NEON machine learning kernels.
+ - Added support for fixed point 8 bit (QS8) to the various Neon machine learning kernels.
- Added unit tests and benchmarks (AlexNet, LeNet)
- Added support for sub tensors.
- Added infrastructure to provide GPU specific optimisation for some OpenCL kernels.
- - Added @ref OMPScheduler (OpenMP) scheduler for NEON
- - Added @ref SingleThreadScheduler scheduler for NEON (For bare metal)
+ - Added @ref OMPScheduler (OpenMP) scheduler for Neon
+ - Added @ref SingleThreadScheduler scheduler for Neon (For bare metal)
- User can specify his own scheduler by implementing the @ref IScheduler interface.
- New OpenCL kernels / functions:
- @ref CLBatchNormalizationLayerKernel / @ref CLBatchNormalizationLayer
- @ref CLWeightsReshapeKernel / @ref CLConvolutionLayerReshapeWeights
- New C++ kernels:
- @ref CPPDetectionWindowNonMaximaSuppressionKernel
- - New NEON kernels / functions:
+ - New Neon kernels / functions:
- @ref NEBatchNormalizationLayerKernel / @ref NEBatchNormalizationLayer
- NEDepthConcatenateLayerKernel / NEDepthConcatenateLayer
- @ref NEDirectConvolutionLayerKernel / @ref NEDirectConvolutionLayer
v17.03.1 First Major public release of the sources
- Renamed the library to arm_compute
- - New CPP target introduced for C++ kernels shared between NEON and CL functions.
+ - New CPP target introduced for C++ kernels shared between Neon and CL functions.
- New padding calculation interface introduced and ported most kernels / functions to use it.
- New OpenCL kernels / functions:
- CLGEMMLowpMatrixMultiplyKernel / CLGEMMLowp
- - New NEON kernels / functions:
+ - New Neon kernels / functions:
- @ref NENormalizationLayerKernel / @ref NENormalizationLayer
- @ref NETransposeKernel / @ref NETranspose
- NELogits1DMaxKernel, NELogits1DShiftExpSumKernel, NELogits1DNormKernel / @ref NESoftmaxLayer
- @ref CLLKTrackerInitKernel, @ref CLLKTrackerStage0Kernel, @ref CLLKTrackerStage1Kernel, @ref CLLKTrackerFinalizeKernel / @ref CLOpticalFlow
- @ref CLNormalizationLayerKernel / @ref CLNormalizationLayer
- @ref CLLaplacianPyramid, @ref CLLaplacianReconstruct
- - New NEON kernels / functions:
+ - New Neon kernels / functions:
- NEActivationLayerKernel / @ref NEActivationLayer
- GEMM refactoring + FP16 support (Requires armv8.2 CPU): @ref NEGEMMInterleave4x4Kernel, @ref NEGEMMTranspose1xWKernel, @ref NEGEMMMatrixMultiplyKernel, @ref NEGEMMMatrixAdditionKernel / @ref NEGEMM
- NEPoolingLayerKernel / @ref NEPoolingLayer
- @ref CLGaussianPyramidHorKernel, @ref CLGaussianPyramidVertKernel / @ref CLGaussianPyramid, @ref CLGaussianPyramidHalf, @ref CLGaussianPyramidOrb
- @ref CLMinMaxKernel, @ref CLMinMaxLocationKernel / @ref CLMinMaxLocation
- @ref CLNonLinearFilterKernel / @ref CLNonLinearFilter
- - New NEON FP16 kernels (Requires armv8.2 CPU)
+ - New Neon FP16 kernels (Requires armv8.2 CPU)
- @ref NEAccumulateWeightedFP16Kernel
- @ref NEBox3x3FP16Kernel
- @ref NENonMaximaSuppression3x3FP16Kernel
- @ref CLDerivativeKernel / @ref CLChannelExtract
- @ref CLFastCornersKernel / @ref CLFastCorners
- @ref CLMeanStdDevKernel / @ref CLMeanStdDev
- - New NEON kernels / functions:
+ - New Neon kernels / functions:
- HOG / SVM: @ref NEHOGOrientationBinningKernel, @ref NEHOGBlockNormalizationKernel, @ref NEHOGDetectorKernel, NEHOGNonMaximaSuppressionKernel / @ref NEHOGDescriptor, @ref NEHOGDetector, @ref NEHOGGradient, @ref NEHOGMultiDetection
- @ref NENonLinearFilterKernel / @ref NENonLinearFilter
- Introduced a CLScheduler to manage the default context and command queue used by the runtime library and create synchronisation events.
@b arch: The x86_32 and x86_64 targets can only be used with neon=0 and opencl=1.
@b os: Choose the operating system you are targeting: Linux, Android or bare metal.
-@note bare metal can only be used for NEON (not OpenCL), only static libraries get built and NEON's multi-threading support is disabled.
+@note bare metal can only be used for Neon (not OpenCL), only static libraries get built and Neon's multi-threading support is disabled.
@b build: you can either build directly on your device (native) or cross compile from your desktop machine (cross-compile). In both cases make sure the compiler is available in your path.
-@note If you want to natively compile for 32bit on a 64bit ARM device running a 64bit OS then you will have to use cross-compile too.
+@note If you want to natively compile for 32bit on a 64bit Arm device running a 64bit OS then you will have to use cross-compile too.
There is also an 'embed_only' option which will generate all the .embed files for the OpenCL kernels and / or OpenGLES compute shaders. This might be useful if using a different build system to compile the library.
@b Werror: If you are compiling using the same toolchains as the ones used in this guide then there shouldn't be any warning and therefore you should be able to keep Werror=1. If with a different compiler version the library fails to build because of warnings interpreted as errors then, if you are sure the warnings are not important, you might want to try to build with Werror=0 (But please do report the issue on Github).
-@b opencl / @b neon / @b gles_compute: Choose which SIMD technology you want to target. (NEON for ARM Cortex-A CPUs or OpenCL / GLES_COMPUTE for ARM Mali GPUs)
+@b opencl / @b neon / @b gles_compute: Choose which SIMD technology you want to target. (Neon for Arm Cortex-A CPUs or OpenCL / GLES_COMPUTE for Arm Mali GPUs)
@b embed_kernels: For OpenCL / GLES_COMPUTE only: set embed_kernels=1 if you want the OpenCL / GLES_COMPUTE kernels to be built in the library's binaries instead of being read from separate ".cl" / ".cs" files. If embed_kernels is set to 0 then the application can set the path to the folder containing the OpenCL / GLES_COMPUTE kernel files by calling CLKernelLibrary::init() / GCKernelLibrary::init(). By default the path is set to "./cl_kernels" / "./cs_shaders".
@b mali: Enable the collection of Mali hardware counters to measure execution time in benchmark tests. (Your device needs to have a Mali driver that supports it)
-@b openmp Build in the OpenMP scheduler for NEON.
+@b openmp Build in the OpenMP scheduler for Neon.
@note Only works when building with g++ not clang++
-@b cppthreads Build in the C++11 scheduler for NEON.
+@b cppthreads Build in the C++11 scheduler for Neon.
@sa Scheduler::set
│  ├── CL
│  ├── datasets
│  ├── fixtures
- │  └── NEON
+ │  └── Neon
└── validation
  ├── CL
  ├── datasets
  ├── fixtures
-   └── NEON
+   └── Neon
Then, build the library with `external_tests_dir=<PATH_TO_EXTERNAL_TESTS_DIR>`.
- gcc-linaro-6.3.1-2017.05-x86_64_arm-linux-gnueabihf
- gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu
-To cross-compile the library in debug mode, with NEON only support, for Linux 32bit:
+To cross-compile the library in debug mode, with Neon only support, for Linux 32bit:
scons Werror=1 -j8 debug=1 neon=1 opencl=0 os=linux arch=armv7a
scons Werror=1 -j8 debug=0 asserts=1 neon=0 opencl=0 gles_compute=1 embed_kernels=1 os=linux arch=arm64-v8a
-You can also compile the library natively on an ARM device by using <b>build=native</b>:
+You can also compile the library natively on an Arm device by using <b>build=native</b>:
scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux arch=arm64-v8a build=native
scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux arch=armv7a build=native
-@note g++ for ARM is mono-arch, therefore if you want to compile for Linux 32bit on a Linux 64bit platform you will have to use a cross compiler.
+@note g++ for Arm is mono-arch, therefore if you want to compile for Linux 32bit on a Linux 64bit platform you will have to use a cross compiler.
For example on a 64bit Debian based system you would have to install <b>g++-arm-linux-gnueabihf</b>
@note The following command lines assume the arm_compute libraries are present in the current directory or in the system library path. If this is not the case you can specify the location of the pre-built libraries with the compiler option -L. When building the OpenCL example the commands below assume that the CL headers are located in the include folder where the command is executed.
-To cross compile a NEON example for Linux 32bit:
+To cross compile a Neon example for Linux 32bit:
arm-linux-gnueabihf-g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++14 -mfpu=neon -L. -larm_compute -larm_compute_core -o neon_convolution
-To cross compile a NEON example for Linux 64bit:
+To cross compile a Neon example for Linux 64bit:
aarch64-linux-gnu-g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++14 -L. -larm_compute -larm_compute_core -o neon_convolution
@note If compiling using static libraries, this order must be followed when linking: arm_compute_graph_static, arm_compute, arm_compute_core
-To compile natively (i.e directly on an ARM device) for NEON for Linux 32bit:
+To compile natively (i.e directly on an Arm device) for Neon for Linux 32bit:
g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++14 -mfpu=neon -larm_compute -larm_compute_core -o neon_convolution
-To compile natively (i.e directly on an ARM device) for NEON for Linux 64bit:
+To compile natively (i.e directly on an Arm device) for Neon for Linux 64bit:
g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++14 -larm_compute -larm_compute_core -o neon_convolution
(notice the only difference with the 32 bit command is that we don't need the -mfpu option)
-To compile natively (i.e directly on an ARM device) for OpenCL for Linux 32bit or Linux 64bit:
+To compile natively (i.e directly on an Arm device) for OpenCL for Linux 32bit or Linux 64bit:
g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++14 -larm_compute -larm_compute_core -o cl_convolution -DARM_COMPUTE_CL
-To compile natively (i.e directly on an ARM device) for GLES for Linux 32bit or Linux 64bit:
+To compile natively (i.e directly on an Arm device) for GLES for Linux 32bit or Linux 64bit:
g++ examples/gc_absdiff.cpp utils/Utils.cpp -I. -Iinclude/ -L. -larm_compute -larm_compute_core -std=c++14 -DARM_COMPUTE_GC -Iinclude/linux/ -o gc_absdiff
@subsubsection S3_3_1_library How to build the library ?
-To cross-compile the library in debug mode, with NEON only support, for Android 32bit:
+To cross-compile the library in debug mode, with Neon only support, for Android 32bit:
CXX=clang++ CC=clang scons Werror=1 -j8 debug=1 neon=1 opencl=0 os=android arch=armv7a
Once you've got your Android standalone toolchain built and added to your path you can do the following:
-To cross compile a NEON example:
+To cross compile a Neon example:
#32 bit:
arm-linux-androideabi-clang++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++14 -larm_compute-static -larm_compute_core-static -L. -o neon_convolution_arm -static-libstdc++ -pie
For example:
adb shell /data/local/tmp/graph_lenet --help
-In this case the first argument of LeNet (like all the graph examples) is the target (i.e 0 to run on NEON, 1 to run on OpenCL if available, 2 to run on OpenCL using the CLTuner), the second argument is the path to the folder containing the npy files for the weights and finally the third argument is the number of batches to run.
+In this case the first argument of LeNet (like all the graph examples) is the target (i.e 0 to run on Neon, 1 to run on OpenCL if available, 2 to run on OpenCL using the CLTuner), the second argument is the path to the folder containing the npy files for the weights and finally the third argument is the number of batches to run.
@subsection S3_4_macos Building for macOS
@subsubsection S3_5_1_library How to build the library ?
-To cross-compile the library with NEON support for baremetal arm64-v8a:
+To cross-compile the library with Neon support for baremetal arm64-v8a:
scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=bare_metal arch=arm64-v8a build=cross_compile cppthreads=0 openmp=0 standalone=1
The Runtime library is a very basic wrapper around the Core library which can be used for quick prototyping, it is basic in the sense that:
- It allocates images and tensors by using standard malloc().
-- It multi-threads NEON code in a very basic way using a very simple pool of threads.
+- It multi-threads Neon code in a very basic way using a very simple pool of threads.
- For OpenCL it uses the default CLScheduler command queue for all mapping operations and kernels.
-For maximum performance, it is expected that the users would re-implement an equivalent to the runtime library which suits better their needs (With a more clever multi-threading strategy, load-balancing between NEON and OpenCL, etc.)
+For maximum performance, it is expected that the users would re-implement an equivalent to the runtime library which suits better their needs (With a more clever multi-threading strategy, load-balancing between Neon and OpenCL, etc.)
@section S4_1_2 Data-type and Data-layout support
@section S4_1_3 Fast-math support
Compute Library supports different types of convolution methods, fast-math flag is only used for the Winograd algorithm.
-When the fast-math flag is enabled, both NEON and CL convolution layers will try to dispatch the fastest implementation available, which may introduce a drop in accuracy as well. The different scenarios involving the fast-math flag are presented below:
+When the fast-math flag is enabled, both Neon and CL convolution layers will try to dispatch the fastest implementation available, which may introduce a drop in accuracy as well. The different scenarios involving the fast-math flag are presented below:
- For FP32:
- no-fast-math: Only supports Winograd 3x3,3x1,1x3,5x1,1x5,7x1,1x7
- fast-math: Supports Winograd 3x3,3x1,1x3,5x1,1x5,7x1,1x7,5x5,7x7
q.finish();
@endcode
-NEON / CPP kernels:
+Neon / CPP kernels:
@code{.cpp}
//Create a kernel object:
@subsection S4_2_3 Multi-threading
-The previous section shows how to run a NEON / CPP kernel in the current thread, however if your system has several CPU cores, you will probably want the kernel to use several cores. Here is how this can be done:
+The previous section shows how to run a Neon / CPP kernel in the current thread, however if your system has several CPU cores, you will probably want the kernel to use several cores. Here is how this can be done:
@code{.cpp}
ThreadInfo info;
}
@endcode
-This is a very basic implementation which was originally used in the NEON runtime library by all the NEON functions.
+This is a very basic implementation which was originally used in the Neon runtime library by all the Neon functions.
@sa CPPScheduler
For example:
@snippet cl_events.cpp OpenCL events
-@subsection S4_4_2_cl_neon OpenCL / NEON interoperability
+@subsection S4_4_2_cl_neon OpenCL / Neon interoperability
-You can mix OpenCL and NEON kernels and functions. However it is the user's responsibility to handle the mapping/unmapping of OpenCL objects, for example:
+You can mix OpenCL and Neon kernels and functions. However it is the user's responsibility to handle the mapping/unmapping of OpenCL objects, for example:
-@snippet neoncl_scale_median_gaussian.cpp NEON / OpenCL Interop
+@snippet neoncl_scale_median_gaussian.cpp Neon / OpenCL Interop
@sa main_neoncl_scale_median_gaussian
- @ref BorderMode::REPLICATE : Neighbor pixels outside of the image are treated as having the same value as the closest valid pixel.
- @ref BorderMode::CONSTANT : Neighbor pixels outside of the image are treated as having the same constant value. (The user can choose what this value should be).
-Moreover both OpenCL and NEON use vector loads and stores instructions to access the data in buffers, so in order to avoid having special cases to handle for the borders all the images and tensors used in this library must be padded.
+Moreover both OpenCL and Neon use vector loads and stores instructions to access the data in buffers, so in order to avoid having special cases to handle for the borders all the images and tensors used in this library must be padded.
@subsubsection padding Padding
The implemented @ref TensorAllocator and @ref CLTensorAllocator objects provide an interface capable of importing existing memory to a tensor as backing memory.
-A simple NEON example can be the following:
+A simple Neon example can be the following:
@code{.cpp}
// External backing memory
void* external_ptr = ...;
This feature introduces some changes to our API.
All the kernels/functions will now accept a Runtime Context object which will allow the function to use the mentioned services.
-Finally, we will try to adapt our code-base progressively to use the new mechanism but will continue supporting the legacy mechanism to allow a smooth transition. Changes will apply to all our three backends: NEON, OpenCL and OpenGL ES.
+Finally, we will try to adapt our code-base progressively to use the new mechanism but will continue supporting the legacy mechanism to allow a smooth transition. Changes will apply to all our three backends: Neon, OpenCL and OpenGL ES.
*/
} // namespace arm_compute
LD_LIBRARY_PATH=. ./arm_compute_validation --mode=precommit --filter="^CL.*"
-To run the NEON precommit benchmark tests with PMU and Wall Clock timer in miliseconds instruments enabled:
+To run the Neon precommit benchmark tests with PMU and Wall Clock timer in miliseconds instruments enabled:
LD_LIBRARY_PATH=. ./arm_compute_benchmark --mode=precommit --filter="^NEON.*" --instruments="pmu,wall_clock_timer_ms" --iterations=10
@subsection S4_1_2_add_kernel Add a kernel
-As we mentioned at the beginning, the kernel is the implementation of the operator or algorithm partially using a specific programming language related to the backend we want to use. Adding a kernel in the library means implementing the algorithm in a SIMD technology like NEON or OpenCL. All kernels in Compute Library must implement a common interface IKernel or one of the specific subinterfaces.
+As we mentioned at the beginning, the kernel is the implementation of the operator or algorithm partially using a specific programming language related to the backend we want to use. Adding a kernel in the library means implementing the algorithm in a SIMD technology like Neon or OpenCL. All kernels in Compute Library must implement a common interface IKernel or one of the specific subinterfaces.
IKernel is the common interface for all the kernels in the core library, it contains the main methods for configure and run the kernel itself, such as window() that return the maximum window the kernel can be executed on or is_parallelisable() for indicate whether or not the kernel is parallelizable. If the kernel is parallelizable then the window returned by the window() method can be split into sub-windows which can then be run in parallel, in the other case, only the window returned by window() can be passed to the run method.
There are specific interfaces for OpenCL and Neon: @ref ICLKernel, INEKernel (using INEKernel = @ref ICPPKernel).
- @ref ICLKernel is the common interface for all the OpenCL kernels. It implements the inherited methods and adds all the methods necessary to configure the CL kernel, such as set/return the Local-Workgroup-Size hint, add single, array or tensor argument, set the targeted GPU architecture according to the CL device. All these methods are used during the configuration and the run of the operator.
-- INEKernel inherits from @ref IKernel as well and it's the common interface for all kernels implemented in NEON, it adds just the run and the name methods.
+- INEKernel inherits from @ref IKernel as well and it's the common interface for all kernels implemented in Neon, it adds just the run and the name methods.
There are two others implementation of @ref IKernel called @ref ICLSimpleKernel and INESimpleKernel, they are the interface for simple kernels that have just one input tensor and one output tensor.
Creating a new kernel implies adding new files:
@snippet src/core/gpu/cl/kernels/ClReshapeKernel.cpp ClReshapeKernel Kernel
The run will call the function defined in the .cl file.
-For the NEON backend case:
+For the Neon backend case:
@snippet src/core/cpu/kernels/CpuReshapeKernel.cpp NEReshapeLayerKernel Kernel
-In the NEON case, there is no need to add an extra file and we implement the kernel in the same NEReshapeLayerKernel.cpp file.
+In the Neon case, there is no need to add an extra file and we implement the kernel in the same NEReshapeLayerKernel.cpp file.
If the tests are already in place, the new kernel can be tested using the existing tests by adding the configure and run of the kernel to the compute_target() in the fixture.
- (sub[n].start() - max[n].start()) % max[n].step() == 0
- (sub[n].end() - sub[n].start()) % max[n].step() == 0
-@ref CPPScheduler::schedule provides a sample implementation that is used for NEON kernels.
-%Memory management is the other aspect that the runtime layer is supposed to handle. %Memory management of the tensors is abstracted using TensorAllocator. Each tensor holds a pointer to a TensorAllocator object, which is used to allocate and free the memory at runtime. The implementation that is currently supported in Compute Library allows memory blocks, required to be fulfilled for a given operator, to be grouped together under a @ref MemoryGroup. Each group can be acquired and released. The underlying implementation of memory groups vary depending on whether NEON or CL is used. The memory group class uses memory pool to provide the required memory. It also uses the memory manager to manage the lifetime and a IPoolManager to manage the memory pools registered with the memory manager.
+@ref CPPScheduler::schedule provides a sample implementation that is used for Neon kernels.
+%Memory management is the other aspect that the runtime layer is supposed to handle. %Memory management of the tensors is abstracted using TensorAllocator. Each tensor holds a pointer to a TensorAllocator object, which is used to allocate and free the memory at runtime. The implementation that is currently supported in Compute Library allows memory blocks, required to be fulfilled for a given operator, to be grouped together under a @ref MemoryGroup. Each group can be acquired and released. The underlying implementation of memory groups vary depending on whether Neon or CL is used. The memory group class uses memory pool to provide the required memory. It also uses the memory manager to manage the lifetime and a IPoolManager to manage the memory pools registered with the memory manager.
We have seen the various interfaces for a kernel in the core library, the same structure the same file structure design exists in the runtime module. IFunction is the base class for all the functions, it has two child interfaces: ICLSimpleFunction and INESimpleFunction that are used as base class for functions which call a single kernel.
-The new operator has to implement %validate(), configure() and run(), these methods will call the respective function in the kernel considering that the multi-threading is used for the kernels which are parallelizable, by default std::thread::hardware_concurrency() threads are used. For NEON function can be used CPPScheduler::set_num_threads() to manually set the number of threads, whereas for OpenCL kernels all the kernels are enqueued on the queue associated with CLScheduler and the queue is then flushed.
+The new operator has to implement %validate(), configure() and run(), these methods will call the respective function in the kernel considering that the multi-threading is used for the kernels which are parallelizable, by default std::thread::hardware_concurrency() threads are used. For Neon function can be used CPPScheduler::set_num_threads() to manually set the number of threads, whereas for OpenCL kernels all the kernels are enqueued on the queue associated with CLScheduler and the queue is then flushed.
For the runtime functions, there is an extra method implemented: prepare(), this method prepares the function for the run, it does all the heavy operations that are done only once (reshape the weight, release the memory not necessary after the reshape, etc). The prepare method can be called standalone or in the first run, if not called before, after then the function will be marked as prepared.
The files we add are:
@endcode
-For NEON:
+For Neon:
@code{.cpp}
using namespace arm_compute;
@tableofcontents
-@section S6_1 NEON functions
+@section S6_1 Neon functions
- @ref IFunction
- @ref INESimpleFunction
- Mali DDK r1p0 - r8p0, and
- Linux kernel >= 4.4
-- On Android with arm64-v8a/arm64-v8.2-a architecture, NEON validation tests can fail when compiled using Android Ndk
+- On Android with arm64-v8a/arm64-v8.2-a architecture, Neon validation tests can fail when compiled using Android Ndk
>= r18b in debug mode (https://github.com/android/ndk/issues/1135).
- Versions Affected: >= v19.11
- OSs Affected: Android
*/
/** @dir src/core/NEON
- * @brief NEON backend core: kernels and utilities.
+ * @brief Neon backend core: kernels and utilities.
*/
/** @file src/core/NEON/NEKernels.h
- * @brief Includes all the NEON kernels at once
+ * @brief Includes all the Neon kernels at once
*/
/** @dir src/core/NEON/kernels
- * @brief Folder containing all the NEON kernels
+ * @brief Folder containing all the Neon kernels
*/
/** @dir arm_compute/core/utils
*/
/** @dir arm_compute/graph/backends/NEON
- * @brief NEON specific operations
+ * @brief Neon specific operations
*/
/** @dir arm_compute/graph/detail
*/
/** @file arm_compute/runtime/CPP/CPPScheduler.h
- * @brief Basic pool of threads to execute CPP/NEON code on several cores in parallel.
+ * @brief Basic pool of threads to execute CPP/Neon code on several cores in parallel.
*/
/** @dir arm_compute/runtime/CPP/functions
*/
/** @dir arm_compute/runtime/NEON
- * @brief NEON backend runtime interface.
+ * @brief Neon backend runtime interface.
*/
/** @file arm_compute/runtime/NEON/NEFunctions.h
- * @brief Includes all the NEON functions at once.
+ * @brief Includes all the Neon functions at once.
*/
/** @dir arm_compute/runtime/NEON/functions
- * @brief Folder containing all the NEON functions.
+ * @brief Folder containing all the Neon functions.
*/
/** @dir arm_compute/runtime/OMP
* -# cl_*.cpp --> OpenCL examples
* -# gc_*.cpp --> GLES compute shaders examples
* -# graph_*.cpp --> Graph examples
- * -# neoncl_*.cpp --> NEON / OpenCL interoperability examples
- * -# neon_*.cpp --> NEON examples
+ * -# neoncl_*.cpp --> Neon / OpenCL interoperability examples
+ * -# neon_*.cpp --> Neon examples
*/
/** @dir examples/gemm_tuner
*/
/** @dir src/core/NEON/wrapper
- * @brief NEON wrapper used to simplify code
+ * @brief Neon wrapper used to simplify code
*/
/** @file src/core/NEON/wrapper/traits.h
- * @brief Traits defined on NEON vectors
+ * @brief Traits defined on Neon vectors
*/
/** @file src/core/NEON/wrapper/wrapper.h
*/
/** @dir src/core/NEON/wrapper/intrinsics
- * @brief NEON intrinsics wrappers
+ * @brief Neon intrinsics wrappers
*/
/** @dir src/core/NEON/wrapper/scalar
*/
/** @dir tests/NEON
- * @brief NEON accessors.
+ * @brief Neon accessors.
*/
/** @dir tests/benchmark
*/
/** @dir tests/benchmark/NEON
- * @brief NEON benchmarking tests.
+ * @brief Neon benchmarking tests.
*/
/** @dir tests/benchmark_examples
*/
/** @dir tests/validation/NEON
- * @brief NEON validation tests.
+ * @brief Neon validation tests.
*/
/** @dir tests/validation/reference
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
Tensor out_fc0{};
Tensor out_softmax{};
- // NEON allocator
+ // Neon allocator
Allocator allocator{};
// Memory groups
/*
- * Copyright (c) 2016-2018 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
}
void do_run() override
{
- // Run NEON softmax:
+ // Run Neon softmax:
softmax.run();
}
void do_teardown() override
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
using namespace arm_compute;
using namespace utils;
-/** Example demonstrating how to use both CL and NEON functions in the same pipeline
+/** Example demonstrating how to use both CL and Neon functions in the same pipeline
*
* @param[in] argc Number of arguments
* @param[in] argv Arguments ( [optional] Path to PPM image to process )
public:
bool do_setup(int argc, char **argv) override
{
- /** [NEON / OpenCL Interop] */
+ /** [Neon / OpenCL Interop] */
PPMLoader ppm;
CLScheduler::get().default_init();
ppm.fill_image(src);
const std::string output_filename = std::string(argv[1]) + "_out.ppm";
}
- /** [NEON / OpenCL Interop] */
+ /** [Neon / OpenCL Interop] */
return true;
}
// Enqueue and flush the OpenCL kernel:
scale.run();
- // Do a blocking map of the input and output buffers of the NEON function:
+ // Do a blocking map of the input and output buffers of the Neon function:
scale_median.map();
median_gauss.map();
- // Run the NEON function:
+ // Run the Neon function:
median.run();
// Unmap the output buffer before it's used again by OpenCL:
Target = collections.namedtuple('Target', 'name prefix basepath')
core_targets = [
- Target("NEON", "NE", src_path), # NEON kernels are under src
+ Target("NEON", "NE", src_path), # Neon kernels are under src
Target("CL", "CL", src_path), # CL kernels are under src
Target("CPP", "CPP", armcv_path), # CPP kernels are under arm_compute
Target("GLES_COMPUTE", "GC", armcv_path) # GLES kernels are under arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/** Each bit of the result is set to the corresponding bit of either then_val or
* else_val depending on whether the corresponding bit of if_mask is set.
- * Equivalent to the VBSL instruction in ARM NEON.
+ * Equivalent to the VBSL instruction in Arm Neon.
*
* @param[in] size Size of vector.
*
}
/** Calculates (a+b)/2, rounded to the nearest integer.
- * Equivalent to VRHADD in the ARM NEON instruction set.
+ * Equivalent to VRHADD in the Arm Neon instruction set.
*
* @param[in] size Size of vector.
*
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
namespace arm_compute
{
-/** Common interface for all kernels implemented in NEON. */
+/** Common interface for all kernels implemented in Neon. */
using INEKernel = ICPPKernel;
} // namespace arm_compute
#endif /*ARM_COMPUTE_INEKERNEL_H */
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
namespace arm_compute
{
-/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */
+/** Interface for simple Neon kernels having 1 tensor input and 1 tensor output */
using INESimpleKernel = ICPPSimpleKernel;
} // namespace arm_compute
#endif /*ARM_COMPUTE_INESIMPLEKERNEL_H */
#ifndef ARM_COMPUTE_NEKERNELS_H
#define ARM_COMPUTE_NEKERNELS_H
-/* Header regrouping all the NEON kernels */
+/* Header regrouping all the Neon kernels */
#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
#include "src/core/NEON/kernels/NEAccumulateKernel.h"
#include "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
template <typename T, bool fused_activation, typename F>
void NEBatchNormalizationLayerKernel::batch_normalization_nchw(const Window &window)
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
const int window_step_x = 16 / sizeof(T);
F activation_functor(_act_info);
// Hold information about the current feature map we are iterating.
- // Only compute denominator and NEON vectors once per feature map.
+ // Only compute denominator and Neon vectors once per feature map.
int slice = -1;
const auto input_mean = reinterpret_cast<const T *>(_mean->ptr_to_element(Coordinates(0, 0)));
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to perform a Box 3x3 filter */
+/** Neon kernel to perform a Box 3x3 filter */
class NEBox3x3Kernel : public INESimpleKernel
{
public:
};
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** NEON kernel to perform a Box 3x3 filter for FP16 datatype
+/** Neon kernel to perform a Box 3x3 filter for FP16 datatype
*/
class NEBox3x3FP16Kernel : public NEBox3x3Kernel
{
void run(const Window &window, const ThreadInfo &info) override;
};
#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-/** NEON kernel to perform a Box 3x3 filter for FP16 datatype */
+/** Neon kernel to perform a Box 3x3 filter for FP16 datatype */
using NEBox3x3FP16Kernel = NEBox3x3Kernel;
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
} // namespace arm_compute
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
ITensor *_phase; /**< Destination tensor - Quantized phase */
};
-/** NEON kernel to perform Non-Maxima suppression for Canny Edge.
+/** Neon kernel to perform Non-Maxima suppression for Canny Edge.
*
* @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input
* to characterize points as possible edges. Thus, at the end, each point will be set to EDGE, NO_EDGE or MAYBE.
int32_t _upper_thr; /**< Upper threshold used for the hysteresis */
};
-/** NEON kernel to perform Edge tracing */
+/** Neon kernel to perform Edge tracing */
class NEEdgeTraceKernel : public INEKernel
{
public:
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups)
{
- // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NCHW, DataLayout::NHWC);
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims)
{
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
// Validate configured output
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to perform col2im reshaping.
+/** Neon kernel to perform col2im reshaping.
*
* Rearranges each matrix column into image blocks. It's the inverse operation of @ref NEIm2ColKernel.
*
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
DataLayout data_layout)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() != 2);
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) != original_input_shape.total_size_lower(3));
// Forward declarations
class ITensor;
-/** NEON kernel to convert asymmetric signed to asymmetric signed and vice-versa */
+/** Neon kernel to convert asymmetric signed to asymmetric signed and vice-versa */
class NEConvertQuantizedSignednessKernel : public INEKernel
{
public:
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
Iterator input(_input, win);
Iterator output(_output, win);
- // Load the matrix's coefficients into NEON registers:
+ // Load the matrix's coefficients into Neon registers:
const int16x4_t mat00 = vld1_dup_s16(_convolution.data());
const int16x4_t mat01 = vld1_dup_s16(_convolution.data() + 1);
const int16x4_t mat02 = vld1_dup_s16(_convolution.data() + 2);
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
AccessWindowHorizontal out_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_processed_per_iteration);
AccessWindowHorizontal out_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_processed_per_iteration);
- // TODO(COMPMID-1503) Fix x-access input bug in NEON kernel instead of '+2'
+ // TODO(COMPMID-1503) Fix x-access input bug in Neon kernel instead of '+2'
AccessWindowHorizontal in_x_access(input->info(), -border_size().left, num_elems_processed_per_iteration + 2);
AccessWindowRectangle in_y_access(input->info(), 0, -border_size().left, num_elems_processed_per_iteration, num_rows_read_per_iteration);
- // TODO(COMPMID-1503) Fix x-access input bug in NEON kernel instead of '+2'
+ // TODO(COMPMID-1503) Fix x-access input bug in Neon kernel instead of '+2'
AccessWindowRectangle in_xy_access(input->info(), -border_size().left, -border_size().top, num_elems_processed_per_iteration + 2, num_rows_read_per_iteration);
if(run_der_x && run_der_y)
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
// Optimized convolver for 1x1 kernels used only where input width and height are both <= 8
// For big Z as in Input=7x7x832, this implementation is faster than the general code becuase it doesn't need to
-// store intermidiate results in memory. Temporary results are stored in NEON registers directly and then written to the output buffer.
+// store intermidiate results in memory. Temporary results are stored in Neon registers directly and then written to the output buffer.
template <unsigned int stridex>
class convolver_w1x1_i8x8_f32
{
}
}
}
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON interface for Direct Convolution Layer kernel */
+/** Neon interface for Direct Convolution Layer kernel */
class NEDirectConvolutionLayerKernel : public INEKernel
{
public:
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
output_stage_nchw(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias)
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
ARM_COMPUTE_ERROR_ON(input->info()->data_layout() == DataLayout::UNKNOWN);
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
namespace arm_compute
{
class ITensor;
-/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input.
+/** Neon kernel to accumulate the biases, if provided, or downscale in case of quantized input.
*
* @note We assume bias to be shared
* @note For quantized computations (i.e. @p input of S32 type) the output data type for auto-initialization must be passed as part
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class ITensor;
using IImage = ITensor;
-/** NEON kernel to perform fast corners */
+/** Neon kernel to perform fast corners */
class NEFastCornersKernel : public INEKernel
{
public:
void NEFillBorderKernel::configure(ITensorInfo *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_ERROR_ON(tensor->data_type() == DataType::UNKNOWN);
_border_size = border_size;
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to interleave the elements of a matrix
+/** Neon kernel to interleave the elements of a matrix
*
* This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values)
*
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to multiply matrices
+/** Neon kernel to multiply matrices
*
* @note @ref NEGEMMLowpMatrixMultiplyKernel low precision matrix product kernel
* This kernel performs the following computation:
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel used to add the offset contribution after @ref NEGEMMLowpMatrixMultiplyKernel. The computation is performed in-place
+/** Neon kernel used to add the offset contribution after @ref NEGEMMLowpMatrixMultiplyKernel. The computation is performed in-place
*
* This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel),
* and adds to it the offset contribution of matrix A and matrix B in-place.
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel used to add the offset contribution and perform the output stage after @ref NEGEMMLowpMatrixMultiplyKernel.
+/** Neon kernel used to add the offset contribution and perform the output stage after @ref NEGEMMLowpMatrixMultiplyKernel.
*
* The computation is performed in-place
*
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
+/** Neon kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
*
* This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
* The following computations will be performed by the kernel:
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16
+/** Neon kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16
*
* This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QSYMM16 value.
* The following computations will be performed by the kernel:
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED
+/** Neon kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED
*
* This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8_SIGNED value.
* The following computations will be performed by the kernel:
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8
+/** Neon kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8
*
* This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8 value.
* The following computations will be performed by the kernel:
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class ITensor;
struct GEMMLowpReductionKernelInfo;
-/** Common interface for all NEON reduction kernels */
+/** Common interface for all Neon reduction kernels */
class INEGEMMLowpReductionKernel : public INEKernel
{
public:
bool _mul_by_scalar;
};
-/** NEON kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
+/** Neon kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
*
* @note This stage is needed to handle the offset of matrix product
* https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
void run_internal(const Window &window);
};
-/** NEON kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
+/** Neon kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
*
* @note This stage is needed to handle the offset of matrix product
* https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta:
+/** Neon kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta:
*
* @note [ MTX_OUT = MTX_0 + beta * MTX_1 ] with MTX_0 and MTX_1 of the same size
*
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication
+/** Neon kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication
*
* @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref NEGEMMInterleave4x4Kernel" and @ref NEGEMMTranspose1xWKernel
* @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
if(output->total_size() != 0)
{
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
// Forward declarations
class ITensor;
-/** NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
+/** Neon kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
*
* Following an example of how the transposition1xW works when the input data is F32
*
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
// Forward declarations
class ITensor;
-/** Kernel to perform other operation on NEON */
+/** Kernel to perform other operation on Neon */
class NEGatherKernel : public INEKernel
{
public:
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to perform a Gaussian 3x3 filter */
+/** Neon kernel to perform a Gaussian 3x3 filter */
class NEGaussian3x3Kernel : public INESimpleKernel
{
public:
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to perform a Gaussian 5x5 filter (horizontal pass) */
+/** Neon kernel to perform a Gaussian 5x5 filter (horizontal pass) */
class NEGaussian5x5HorKernel : public INESimpleKernel
{
public:
BorderSize _border_size;
};
-/** NEON kernel to perform a Gaussian 5x5 filter (vertical pass) */
+/** Neon kernel to perform a Gaussian 5x5 filter (vertical pass) */
class NEGaussian5x5VertKernel : public INESimpleKernel
{
public:
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to perform a GaussianPyramid (horizontal pass) */
+/** Neon kernel to perform a GaussianPyramid (horizontal pass) */
class NEGaussianPyramidHorKernel : public INESimpleKernel
{
public:
int _l2_load_offset;
};
-/** NEON kernel to perform a GaussianPyramid (vertical pass) */
+/** Neon kernel to perform a GaussianPyramid (vertical pass) */
class NEGaussianPyramidVertKernel : public INESimpleKernel
{
public:
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to perform HOG Orientation Binning */
+/** Neon kernel to perform HOG Orientation Binning */
class NEHOGOrientationBinningKernel : public INEKernel
{
public:
float _phase_scale;
};
-/** NEON kernel to perform HOG block normalization */
+/** Neon kernel to perform HOG block normalization */
class NEHOGBlockNormalizationKernel : public INEKernel
{
public:
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to perform HOG detector kernel using linear SVM */
+/** Neon kernel to perform HOG detector kernel using linear SVM */
class NEHOGDetectorKernel : public INEKernel
{
public:
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
BorderSize _border_size; /**< Border size */
};
-/** Template NEON kernel to perform Harris Score.
+/** Template Neon kernel to perform Harris Score.
* The implementation supports 3, 5, and 7 for the block_size
*/
template <int32_t block_size>
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::BFLOAT16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized(input->data_type()) && has_bias);
ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Number of groups greater than one are not supported on NEON");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Number of groups greater than one are not supported on Neon");
// Since there's no implicit padding added, check the total input spatial dimensions (with conv paddings) are big enough for the kernel dimensions
const unsigned int width_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
template <typename T, typename AccType = T>
void instance_normalization_nchw(ITensor *input, ITensor *output, float gamma, float beta, float epsilon, const Window &window)
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
// Clear X/Y dimensions on execution window as we handle the planes manually
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** Interface for NEON Array of Internal Key Points. */
+/** Interface for Neon Array of Internal Key Points. */
using INELKInternalKeypointArray = IArray<NELKInternalKeypoint>;
/** Interface for the Lucas-Kanade tracker kernel */
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** Interface to perform Non-Maxima suppression over a 3x3 window using NEON
+/** Interface to perform Non-Maxima suppression over a 3x3 window using Neon
*
* @note Used by @ref NEFastCorners and @ref NEHarrisCorners
*/
};
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32
+/** Neon kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32
*/
class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel
{
void configure(const ITensor *input, ITensor *output, bool border_undefined);
};
#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */
+/** Neon kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */
using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel;
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
template <typename T, unsigned int S, unsigned int dim, bool do_2D_norm>
void NENormalizationLayerKernel::normalize_float(const Window &window)
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
Window win(window);
// Run function
(this->*_func)(window);
}
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to add padding to a tensor
+/** Neon kernel to add padding to a tensor
*
* Add padding given padding information
*/
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to perform layer normalization */
+/** Neon kernel to perform layer normalization */
class NEQLSTMLayerNormalizationKernel : public INEKernel
{
public:
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
template <typename T>
void range_function(ITensor *output, float start, float step, const Window &window)
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector<T, wrapper::traits::BitWidth::W128>::tag_type;
const auto step_vec = wrapper::vdup_n(static_cast<T>(step), ExactTagType{});
(*_func)(_output, _start, _step, window);
}
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
template <typename T, int S>
struct RedOpX
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
inline void operator()(const Window &in_window, Window &out_window, const ITensor *in, ITensor *out, const ReductionOperation op)
template <typename T, int S>
struct RedOpYZW
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
using neon_vector = typename wrapper::traits::neon_vector<T, S>::type;
template <typename T, int S, int axis, ReductionOperation op>
struct RedOpYZW_complex
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
using neon_vector = typename wrapper::traits::neon_vector<T, S>::type;
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to perform a reduction operation
+/** Neon kernel to perform a reduction operation
*
* @note For ARG_MIN/ARG_MAX reduction, the default data type for an uninitialized
* output tensor is signed 32-bit integer (S32). It is the user's responsibility
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to perform a remap on a tensor */
+/** Neon kernel to perform a remap on a tensor */
class NERemapKernel : public INEKernel
{
public:
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t stride)
{
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output, axis);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(axis, 1, DataType::U32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis->num_dimensions() > 1, "Axis must be a 1D tensor");
ARM_COMPUTE_ERROR("Element size not supported");
}
}
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
{
class ITensor;
-/** NEON kernel to perform scaling on a tensor */
+/** Neon kernel to perform scaling on a tensor */
class NEScaleKernel : public INEKernel
{
public:
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
Status validate_arguments(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON(idx_input >= num_tensors);
ARM_COMPUTE_RETURN_ERROR_ON(axis > input->num_dimensions());
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/
+/** Neon kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/
class NEStackLayerKernel : public INEKernel
{
public:
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
inline void NEThresholdKernel::run_binary(const Window &window)
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using Type = uint8_t;
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<Type, wrapper::traits::BitWidth::W128>;
inline void NEThresholdKernel::run_range(const Window &window)
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using Type = uint8_t;
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<Type, wrapper::traits::BitWidth::W128>;
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to perform a tile operation */
+/** Neon kernel to perform a tile operation */
class NETileKernel : public INEKernel
{
public:
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
if(output->total_size() != 0)
Iterator output(out, window_out);
- // Run the NEON path if and only if the input is not a row-vector
+ // Run the Neon path if and only if the input is not a row-vector
if(in->info()->dimension(1) != 1)
{
Iterator input(in, window_in);
Iterator output(out, window_out);
- // Run the NEON path if and only if the input is not a row-vector
+ // Run the Neon path if and only if the input is not a row-vector
if(in->info()->dimension(1) != 1)
{
Iterator input(in, window_in);
Iterator output(out, window_out);
- // Run the NEON path if and only if the input is not a row-vector
+ // Run the Neon path if and only if the input is not a row-vector
if(in->info()->dimension(1) != 1)
{
Iterator input(in, window_in);
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel which transposes the elements of a matrix.
+/** Neon kernel which transposes the elements of a matrix.
*
* [width, height, batch] -> [height, width, batch]
*
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
if(biases != nullptr)
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
class ITensor;
-/** NEON kernel to perform reshaping on the weights used by convolution and locally connected layer
+/** Neon kernel to perform reshaping on the weights used by convolution and locally connected layer
*
* Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels.
* In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication.
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
// Forward declarations
class ITensor;
-/** Interface for the NEON kernel to perform Winograd input transform. */
+/** Interface for the Neon kernel to perform Winograd input transform. */
class INEWinogradLayerTransformInputKernel : public INEKernel
{
public:
}
};
-/** NEON kernel to perform Winograd input transform. */
+/** Neon kernel to perform Winograd input transform. */
template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel
{
ITensor *_workspace;
};
-/** Interface for the NEON kernel to perform Winograd output transform. */
+/** Interface for the Neon kernel to perform Winograd output transform. */
class INEWinogradLayerTransformOutputKernel : public INEKernel
{
public:
}
};
-/** NEON kernel to perform Winograd output transform. */
+/** Neon kernel to perform Winograd output transform. */
template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel
{
int _num_channels;
};
-/** Interface for the NEON kernel to perform Winograd weights transform. */
+/** Interface for the Neon kernel to perform Winograd weights transform. */
class INEWinogradLayerTransformWeightsKernel : public INEKernel
{
public:
static Status validate(const ITensorInfo *input, const ITensorInfo *weights);
};
-/** NEON kernel to perform Winograd weights transform. */
+/** Neon kernel to perform Winograd weights transform. */
template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel
{
int _num_input_channels;
};
-/** NEON kernel to perform Winograd. */
+/** Neon kernel to perform Winograd. */
template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
class NEWinogradLayerConfiguration
{
},
#endif // __ARM_FEATURE_SVE
-// NEON hybrid methods
+// Neon hybrid methods
{
GemmMethod::GEMM_HYBRID,
"a64_smallK_hybrid_fp32_mla_8x4",
/* AArch32 */
#ifdef __arm__
/* FP32 */
-/* NEON implementation (height 6) */
+/* Neon implementation (height 6) */
template void IndirectInterleave<6, 1, VLType::None>(float *, const float * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<6, 1, VLType::None>(float *, const float *, size_t, const convolver<float> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<6, 1, VLType::None>(float *, const float *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
/* FP16 */
#if __ARM_FP16_ARGS
-/* NEON implementation using FP32 kernel (height 6) */
+/* Neon implementation using FP32 kernel (height 6) */
template void IndirectInterleave<6, 1, VLType::None>(float *, const __fp16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<6, 1, VLType::None>(float *, const __fp16 *, size_t, const convolver<__fp16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<6, 1, VLType::None>(float *, const __fp16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
#endif /* __ARM_FP16_ARGS */
/* BF16 */
-/* NEON implementation using FP32 kernel */
+/* Neon implementation using FP32 kernel */
template void IndirectInterleave<6, 1, VLType::None>(float *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<6, 1, VLType::None>(float *, const bfloat16 *, size_t, const convolver<bfloat16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<6, 1, VLType::None>(float *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
/* AArch64 */
#ifdef __aarch64__
/* FP32 */
-/* NEON/SVE implementation (height 8) */
+/* Neon/SVE implementation (height 8) */
template void IndirectInterleave<8, 1, VLType::None>(float *, const float * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<8, 1, VLType::None>(float *, const float *, size_t, const convolver<float> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 1, VLType::None>(float *, const float *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 1, VLType::None>(float *, const __fp16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
/* BF16 */
-/* NEON/SVE BFDOT */
+/* Neon/SVE BFDOT */
#ifdef V8P6_BF
template void IndirectInterleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_t, const convolver<bfloat16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
#endif // V8P6_BF
-/* NEON/SVE using FP32 kernel */
+/* Neon/SVE using FP32 kernel */
template void IndirectInterleave<8, 1, VLType::None>(float *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<8, 1, VLType::None>(float *, const bfloat16 *, size_t, const convolver<bfloat16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 1, VLType::None>(float *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 1, VLType::None>(uint16_t *, const uint16_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
/* INT8 */
-/* NEON SMLA/SMLAL (height 4, block 16) */
+/* Neon SMLA/SMLAL (height 4, block 16) */
template void IndirectInterleave<4, 16, VLType::None>(int8_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<4, 16, VLType::None>(int8_t *, const int8_t *, size_t, const convolver<int8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<4, 16, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-/* NEON SDOT (height 8, block 4) */
+/* Neon SDOT (height 8, block 4) */
template void IndirectInterleave<8, 4, VLType::None>(int8_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
template void ConvolutionInterleave<8, 4, VLType::None>(int8_t *, const int8_t *, size_t, const convolver<int8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 4, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 8, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
#endif // MMLA_INT8
-/* NEON SDOT (height 8, block 1) */
+/* Neon SDOT (height 8, block 1) */
template void IndirectInterleave<8, 1, VLType::None>(int16_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
template void ConvolutionInterleave<8, 1, VLType::None>(int16_t *, const int8_t *, size_t, const convolver<int8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 1, VLType::None>(int16_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-/* NEON SMLA/SMLAL (height 4, block 16) */
+/* Neon SMLA/SMLAL (height 4, block 16) */
template void IndirectInterleave<4, 16, VLType::None>(uint8_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<4, 16, VLType::None>(uint8_t *, const uint8_t *, size_t, const convolver<uint8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<4, 16, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-/* NEON SDOT (height 8, block 4) */
+/* Neon SDOT (height 8, block 4) */
template void IndirectInterleave<8, 4, VLType::None>(uint8_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
template void ConvolutionInterleave<8, 4, VLType::None>(uint8_t *, const uint8_t *, size_t, const convolver<uint8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 4, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 8, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
#endif // MMLA_INT8
-/* NEON 16-bit (height 8, block 1) */
+/* Neon 16-bit (height 8, block 1) */
template void IndirectInterleave<8, 1, VLType::None>(uint16_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
template void ConvolutionInterleave<8, 1, VLType::None>(uint16_t *, const uint8_t *, size_t, const convolver<uint8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 1, VLType::None>(uint16_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
void batch_normalization(ITensor *src, ITensor *dst, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma,
float epsilon, ActivationLayerInfo &act_info, const Window &window)
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<float16_t, wrapper::traits::BitWidth::W128>;
const int window_step_x = 8;
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
\ No newline at end of file
+#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
void batch_normalization(ITensor *src, ITensor *dst, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma,
float epsilon, ActivationLayerInfo &act_info, const Window &window)
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<float, wrapper::traits::BitWidth::W128>;
const int window_step_x = 4;
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
template <typename T, int S>
struct dummy
{
- /** NEON vector type. */
+ /** Neon vector type. */
using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
/** Construct a dummy activation object.
template <typename T, int S>
struct linear
{
- /** NEON vector type. */
+ /** Neon vector type. */
using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
/** Construct a Linear activation object.
template <typename T, int S>
struct square
{
- /** NEON vector type. */
+ /** Neon vector type. */
using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
/** Construct a Square activation object.
template <typename T, int S>
struct logistic
{
- /** NEON vector type. */
+ /** Neon vector type. */
using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
/** Construct a Logistic activation object.
template <typename T, int S>
struct relu
{
- /** NEON vector type. */
+ /** Neon vector type. */
using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
/** Construct a RELU activation object.
template <typename T, int S>
struct brelu
{
- /** NEON vector type. */
+ /** Neon vector type. */
using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
/** Construct a bounded RELU activation object.
template <typename T, int S>
struct lubrelu
{
- /** NEON vector type. */
+ /** Neon vector type. */
using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
/** Construct a lower-upper bounded RELU activation object.
/** 128-bit vector tag */
struct vector_128_tag {};
-/** Create the appropriate NEON vector given its type and size in terms of elements */
+/** Create the appropriate Neon vector given its type and size in terms of elements */
template <typename T, int S> struct neon_vector;
// Specializations
W128, /**< 128-bit width */
};
-/** Create the appropriate NEON vector given its type and size in terms of bits */
+/** Create the appropriate Neon vector given its type and size in terms of bits */
template <typename T, BitWidth BW> struct neon_bitvector;
// Specializations
#ifndef DOXYGEN_SKIP_THIS
Status validate_arguments(const ITensorInfo *src, unsigned int batch_offset, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
Status validate_arguments(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
- // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimX) != dst->dimension(Window::DimX));
Status validate_arguments(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
- // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) + width_offset > dst->dimension(0));
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
- // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use NEON FP16 instructions.
+ // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use Neon FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
void fp16_neon_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<float16_t, wrapper::traits::BitWidth::W128>;
const ActivationLayerInfo::ActivationFunction act = act_info.activation();
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
\ No newline at end of file
+#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
void fp32_neon_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename arm_compute::wrapper::traits::neon_bitvector_tag_t<float, wrapper::traits::BitWidth::W128>;
constexpr int window_step_x = 4;
template <typename ScalarType>
void add_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<ScalarType, wrapper::traits::BitWidth::W128>;
// Create input windows
}
} // namespace cpu
} // namespace arm_compute
-#endif // SRC_CORE_NEON_KERNELS_ADD_LIST_H
\ No newline at end of file
+#endif // SRC_CORE_NEON_KERNELS_ADD_LIST_H
Iterator in(src, window_src);
Iterator out(dst0, window);
- /** NEON vector types */
+ /** Neon vector types */
using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
using q8x8x2_t = typename std::conditional<std::is_same<T, uint8_t>::value, uint8x8x2_t, int8x8x2_t>::type;
Iterator in(src, window_src);
Iterator out(dst0, window);
- /** NEON vector types */
+ /** Neon vector types */
using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
using q8x8x2_t = typename std::conditional<std::is_same<T, uint8_t>::value, uint8x8x2_t, int8x8x2_t>::type;
Iterator in(src, window_src);
Iterator out(dst0, window);
- /** NEON vector types */
+ /** Neon vector types */
using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
using q16_t = typename wrapper::traits::promote_t<T>;
using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
} // namespace cpu
} // namespace arm_compute
-#endif // SRC_CORE_NEON_KERNELS_QUANTIZED_H
\ No newline at end of file
+#endif // SRC_CORE_NEON_KERNELS_QUANTIZED_H
template <typename T>
void neon_logits_1d_max(const ITensor *in, ITensor *out, const Window &window)
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
constexpr int window_step_x = 16 / sizeof(T);
Iterator max_it(max, window);
Iterator out_it(out, window);
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
constexpr int vec_size = 16 / sizeof(T);
template <typename T>
void sub_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
{
- /** NEON vector tag type. */
+ /** Neon vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
bool is_sat = policy == ConvertPolicy::SATURATE;
}
} // namespace cpu
} // namespace arm_compute
-#endif // SRC_CORE_NEON_KERNELS_SUB_LIST_H
\ No newline at end of file
+#endif // SRC_CORE_NEON_KERNELS_SUB_LIST_H
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
namespace backends
{
-/** Register NEON backend */
+/** Register Neon backend */
static detail::BackendRegistrar<NEDeviceBackend> NEDeviceBackend_registrar(Target::NEON);
NEDeviceBackend::NEDeviceBackend()
std::unique_ptr<arm_compute::IFunction> NEDeviceBackend::configure_node(INode &node, GraphContext &ctx)
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Configuring NEON node with ID : " << node.id() << std::endl);
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Configuring Neon node with ID : " << node.id() << std::endl);
ARM_COMPUTE_ERROR_ON(node.assigned_target() != Target::NEON);
// Configure node
arm_compute::Status NEDeviceBackend::validate_node(INode &node)
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Validating NEON node with ID : " << node.id() << std::endl);
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Validating Neon node with ID : " << node.id() << std::endl);
ARM_COMPUTE_ERROR_ON(node.assigned_target() != Target::NEON);
return NENodeValidator::validate(&node);
Target NETargetInfo::TargetType = Target::NEON;
-/** Collection of NEON convolution functions */
+/** Collection of Neon convolution functions */
struct NEConvolutionLayerFunctions
{
using GenericConvolutionLayer = NEConvolutionLayer;
using WinogradConvolutionLayer = NEWinogradConvolutionLayer;
};
-/** Collection of NEON element-wise functions */
+/** Collection of Neon element-wise functions */
struct NEEltwiseFunctions
{
using Addition = NEArithmeticAddition;
using Maximum = NEElementwiseMax;
};
-/** Collection of NEON unary element-wise functions */
+/** Collection of Neon unary element-wise functions */
struct NEUnaryEltwiseFunctions
{
using Exp = NEExpLayer;
};
-/** Function and tensor types to be used inside a NEON fused convolution/batch normalization layer */
+/** Function and tensor types to be used inside a Neon fused convolution/batch normalization layer */
struct NEFusedLayerTypes
{
using ConvolutionLayer = NEConvolutionLayer;
{
namespace backends
{
-/** Collection of NEON element-wise functions */
+/** Collection of Neon element-wise functions */
struct NEEltwiseLayerFunctions
{
using ArithmeticAddition = NEArithmeticAddition;
using ElementwiseMax = NEElementwiseMax;
};
-/** Collection of NEON unary element-wise functions */
+/** Collection of Neon unary element-wise functions */
struct NEUnaryEltwiseLayerFunctions
{
using ExpLayer = NEExpLayer;
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
{
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1), "Grouping (num_groups != 1) is not supported on NEON");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1), "Grouping (num_groups != 1) is not supported on Neon");
const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups);
switch(NEConvolutionLayer::get_convolution_method(input, weights, output, conv_info, weights_info, dilation, act_info, enable_fast_math))
/** Assembly Gemm kernel */
std::shared_ptr<arm_gemm::GemmCommon<TypeInput, TypeOutput>> _gemm_kernel_asm{ nullptr };
- /** Optimised NEON kernel */
+ /** Optimised Neon kernel */
std::unique_ptr<INEKernel> _optimised_kernel{ nullptr };
/** Input A */
const ITensor *_a
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::BFLOAT16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::BFLOAT16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.num_groups > 1, "Grouping (num_groups != 1) is not supported on NEON");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.num_groups > 1, "Grouping (num_groups != 1) is not supported on Neon");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_layout() != DataLayout::NHWC, "Data layout supported is NHWC");
const DataType data_type = input->data_type();
const TensorShape i_shape = input->tensor_shape();
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::BFLOAT16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::BFLOAT16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Grouping (num_groups != 1) is not supported on NEON");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Grouping (num_groups != 1) is not supported on Neon");
const DataLayout data_layout = input->data_layout();
const DataType data_type = input->data_type();
if(!skip_im2col)
{
// Create tensor info for im2col reshaped inputs
- // For NEON the batch size is on the fourth dimension
+ // For Neon the batch size is on the fourth dimension
// TODO (giaiod01): Auto-initialize the output shape of im2col COMPMID-1482
TensorShape shape_im2col = input->tensor_shape();
shape_im2col.set(0, mat_weights_rows);
{
// Forward Declarations
class CpuPoolingAssemblyDispatch;
-/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following NEON kernels:
+/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if padding size is different from zero)
* -# @ref kernels::CpuPoolingKernel
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
datasets::BorderModes()),
framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })));
TEST_SUITE_END() // Scale
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace benchmark
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
namespace test
{
-/** Fixture that can be used for NEON, CL and OpenGL ES */
+/** Fixture that can be used for Neon, CL and OpenGL ES */
template <typename TensorType, typename Function, typename Accessor, typename T>
class ScaleLayerFixture : public framework::Fixture
{
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
}),
framework::dataset::make("AlignCorners", { true }));
-/** Generated shapes: Used by NEON precommit and nightly
+/** Generated shapes: Used by Neon precommit and nightly
* - 2D shapes with 0, 1, 2 vector iterations
* - 3D shapes with 0, 1 vector iterations
* - 4D shapes with 0 vector iterations
} // namespace datasets
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_SCALE_VALIDATION_DATASET */
\ No newline at end of file
+#endif /* ARM_COMPUTE_TEST_SCALE_VALIDATION_DATASET */
files = [f for f in files if "OpenCL" not in os.path.basename(str(f))]
if not framework_env['mali']:
- # Remove MALI files
+ # Remove Mali files
files = [f for f in files if "MaliCounter" not in os.path.basename(str(f))]
else:
framework_env.Append(CPPDEFINES = ['MALI_ENABLED'])
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
DataType::F16,
});
-/** Aligned corners, this functionality is supported only by NEON and OpenCL backends */
+/** Aligned corners, this functionality is supported only by Neon and OpenCL backends */
const auto AlignCorners = framework::dataset::make("AlignCorners",
{
false,
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // S16
TEST_SUITE_END() // AbsoluteDifference
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // U8
TEST_SUITE_END() // AccumulateSquared
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // ActivationLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
}
TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE_END() // ArgMinMax
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // ArithmeticAddition
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // ArithmeticSubtraction
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // BatchNormalizationLayerFusion
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END()
TEST_SUITE_END() // BatchToSpace
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
validate(Accessor(_target), _reference);
}
TEST_SUITE_END() // BitwiseAnd
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
validate(Accessor(_target), _reference);
}
TEST_SUITE_END() // BitwiseNot
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
validate(Accessor(_target), _reference);
}
TEST_SUITE_END() // BitwiseOr
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
validate(Accessor(_target), _reference);
}
TEST_SUITE_END() // BitwiseXor
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // BBoxTransform
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
CAST_SUITE(F32_to_U8, DataType::F32, DataType::S32, NECastToS32Fixture<float>, CastF32toS32Dataset, one_tolerance)
TEST_SUITE_END() // Cast
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // YUVPlanar
TEST_SUITE_END() // ChannelCombine
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // YUVPlanar
TEST_SUITE_END() // ChannelExtract
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // ChannelShuffle
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // NVtoYUV
TEST_SUITE_END() // ColorConvert
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // QASYMM8
TEST_SUITE_END() // ConvertFullyConnectedWeights
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
/** Tolerance value for comparing reference's output against implementation
*
- * This is due to the fact that NEON target performs multiplication with reciprocal of scale,
+ * This is due to the fact that Neon target performs multiplication with reciprocal of scale,
* while reference performs direct division with scale.
*/
constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1);
TEST_SUITE_END() // Separable9x9
TEST_SUITE_END() // CustomConvolution
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // DirectGEMMConv2d
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // FixedSeed
TEST_SUITE_END() // Copy
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // S32
TEST_SUITE_END() // CropResize
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // DeconvolutionLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // S32_to_U8
TEST_SUITE_END() // DepthConvertLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END()
TEST_SUITE_END() // DepthToSpace
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // DepthwiseConvLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
TEST_SUITE_END() // DepthwiseConvolutionLayerNative
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // FP32
TEST_SUITE_END() // DequantizationLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
}
TEST_SUITE_END() // Derivative
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
quantize_and_fill_tensor(Accessor(anchors), anchors_vector);
}
- // Determine the output through the NEON kernel
+ // Determine the output through the Neon kernel
Tensor output_boxes;
Tensor output_classes;
Tensor output_scores;
TEST_SUITE_END() // QASYMM8
TEST_SUITE_END() // DetectionPostProcessLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
}
TEST_SUITE_END() // Dilate
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // GEMMDilatedConvolutionLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
TEST_SUITE_END() // DirectConvolutionLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Integer
TEST_SUITE_END() // AbsLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
namespace
{
RelativeTolerance<float> tolerance_fp32(0.000001f);
-AbsoluteTolerance<int> tolerance_zero_s32(1); // Tolerance for S32 division
+AbsoluteTolerance<int> tolerance_zero_s32(1); // Tolerance for S32 division
/** Input data sets **/
const auto ElementwiseDivisionS32Dataset = combine(combine(framework::dataset::make("DataType", DataType::S32),
TEST_SUITE_END() // Integer
TEST_SUITE_END() // ElementwiseDivision
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // ExpLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
TEST_SUITE_END() // LogLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // ElementwiseMax
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // ElementwiseMin
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Integer
TEST_SUITE_END() // NegLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // ElementwisePower
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float
TEST_SUITE_END() // RoundLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // RsqrtLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // SinLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // ElementwiseSquaredDiff
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // FFTConvolutionLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
}
TEST_SUITE_END() // U32
-
TEST_SUITE(S32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEFillFixture<int32_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::S32)))
{
TEST_SUITE_END() // F32
TEST_SUITE_END() // Fill
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // FlattenLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // DepthwiseConvolution
TEST_SUITE_END() // FuseBatchNormalization
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // QuantizeDownInt32ToInt16ScaleByFixedPoint
TEST_SUITE_END() // OutputStage
TEST_SUITE_END() // GEMMLowp
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // U16
TEST_SUITE_END() // Gather
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // GenerateProposals
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
namespace
{
/* Set the tolerance (percentage) used when validating the score of detection window.
- Note: High tolerance is required due to divergence between CL and NEON detection window scores. */
+ Note: High tolerance is required due to divergence between CL and Neon detection window scores. */
RelativeTolerance<float> tolerance(1.0f);
/* Input dataset (values must be a multiple of the HOGInfo block_size) */
}
TEST_SUITE_END() // Special Cases
TEST_SUITE_END() // Im2Col
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE_END() // InstanceNormalizationLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE_END() // L2NormalizeLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // FP16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE_END() // LSTMLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
// *INDENT-ON*
TEST_SUITE_END() // LSTMLayerQuantized
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
validate(Accessor(_target), _reference);
}
TEST_SUITE_END() // LogicalNot
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE_END() // Float
TEST_SUITE_END() // PoolingLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // MeanStdNormalizationLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // F32
TEST_SUITE_END() // MinMaxLocation
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // NormalizationLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // PReluLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // PadLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // PoolingLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // FP32
TEST_SUITE_END() // PriorBoxLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE(QSYMM16)
/** Tests will be targetting
- * - Comparison between NEON kernel and the exact same but scalar version of reference kernel
+ * - Comparison between Neon kernel and the exact same but scalar version of reference kernel
* - Input shapes of 1D and 2D with the first dimension covers boundary values of 128-bit vector size (0~3 iterations)
* - Weight and bias 1D shape that have same size as that of input shapes
* - Quantization scale is greater and smaller than one.
* - The algorithm has been sensitive to quantization scale but it is hard to fully test
* the sensitivity due to aforementioned reason.
* - Again, it is hard to fully test corner values due to the exact same algorithm of the
- * reference kernel and the NEON kernel.
+ * reference kernel and the Neon kernel.
*/
constexpr uint32_t qsymm16_per_vector = vector_size_byte / sizeof(int16_t);
TEST_SUITE_END() // QSYMM16
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // QLSTMLayerNormalization
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // QuantizationLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
TEST_SUITE_END() // FP16
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
TEST_SUITE_END() // RNNLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // RoiAlign
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // Range
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // ReduceMean
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE_END() // ReductionOperation
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // S8
TEST_SUITE_END() // ReorgLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // Reverse
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // Scale
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // Select
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // Slice
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // QASYMM8
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // SpaceToBatch
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // SpaceToDepthLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // Split
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // S8
TEST_SUITE_END() // Shapes4D
TEST_SUITE_END() // StackLayer
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Float
TEST_SUITE_END() // StridedSlice
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // Tile
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
}
TEST_SUITE_END() // DynamicTensor
TEST_SUITE_END() // UNIT
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // RuntimeContext
TEST_SUITE_END() // UNIT
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
TEST_SUITE_END() // QASYMM8
TEST_SUITE_END() // Unstack
-TEST_SUITE_END() // NEON
+TEST_SUITE_END() // Neon
} // namespace validation
} // namespace test
} // namespace arm_compute
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
}
else
{
- // When converting S32 to F16, both reference and NEON implementations are + or - infinity outside the F16 range.
- if(dt_in==DataType::S32 && dt_out==DataType::F16)
+ // When converting S32 to F16, both reference and Neon implementations are + or - infinity outside the F16 range.
+ if(dt_in == DataType::S32 && dt_out == DataType::F16)
{
std::uniform_int_distribution<int32_t> distribution_s32(-65504, 65504);
library->fill(tensor, distribution_s32, i);
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
return (x >> exponent) + ((x & mask) > threshold ? 1 : 0);
}
-/** Multiplication of two integers. The same as ARMv7 NEON VQRDMULH instruction. */
+/** Multiplication of two integers. The same as ARMv7 Neon VQRDMULH instruction. */
inline int32_t asymm_int_mult(int32_t a, int32_t b)
{
bool overflow = a == b && a == std::numeric_limits<int32_t>::min();
*
* --help : Print the example's help message.
* --threads : The number of threads to be used by the example during execution.
- * --target : Execution target to be used by the examples. Supported target options: NEON, CL, GC.
+ * --target : Execution target to be used by the examples. Supported target options: Neon, CL, GC.
* --type : Data type to be used by the examples. Supported data type options: QASYMM8, F16, F32.
* --layout : Data layout to be used by the examples. Supported data layout options : NCHW, NHWC.
* --enable-tuner : Toggle option to enable the OpenCL dynamic tuner.
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/** Utility function to return the TargetHint
*
- * @param[in] target Integer value which expresses the selected target. Must be 0 for NEON or 1 for OpenCL or 2 (OpenCL with Tuner)
+ * @param[in] target Integer value which expresses the selected target. Must be 0 for Neon or 1 for OpenCL or 2 (OpenCL with Tuner)
*
* @return the TargetHint
*/