#ifndef ARM_COMPUTE_CPPBOXWITHNONMAXIMASUPPRESSIONLIMITKERNEL_H
#define ARM_COMPUTE_CPPBOXWITHNONMAXIMASUPPRESSIONLIMITKERNEL_H
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/CPP/ICPPKernel.h"
#include "arm_compute/core/Types.h"
namespace arm_compute
#ifndef ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H
#define ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H
+#include "arm_compute/core/CPP/ICPPKernel.h"
#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "support/Mutex.h"
/** CPP kernel to perform corner candidates
*/
-class CPPCornerCandidatesKernel : public INEKernel
+class CPPCornerCandidatesKernel : public ICPPKernel
{
public:
const char *name() const override
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H
#define ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H
+#include "arm_compute/core/CPP/ICPPKernel.h"
#include "arm_compute/core/IArray.h"
#include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Types.h"
namespace arm_compute
CPPDetectionWindowNonMaximaSuppressionKernel(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default;
/** Allow instances of this class to be moved */
CPPDetectionWindowNonMaximaSuppressionKernel &operator=(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default;
+ /** Default destructor */
+ ~CPPDetectionWindowNonMaximaSuppressionKernel() = default;
/** Initialise the kernel's input, output and the euclidean minimum distance
*
* @attention: If @ref IDetectionWindowArray is passed to the kernel, the map() and unmap() methods @ref IDetectionWindowArray must be called respectively before and after
+++ /dev/null
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_INEKERNEL_H
-#define ARM_COMPUTE_INEKERNEL_H
-
-#include "arm_compute/core/CPP/ICPPKernel.h"
-
-namespace arm_compute
-{
-/** Common interface for all kernels implemented in NEON. */
-using INEKernel = ICPPKernel;
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_INEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_INESIMPLEKERNEL_H
-#define ARM_COMPUTE_INESIMPLEKERNEL_H
-
-#include "arm_compute/core/CPP/ICPPSimpleKernel.h"
-
-namespace arm_compute
-{
-/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */
-using INESimpleKernel = ICPPSimpleKernel;
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_INESIMPLEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEKERNELS_H
-#define ARM_COMPUTE_NEKERNELS_H
-
-/* Header regrouping all the NEON kernels */
-#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
-#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h"
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h"
-#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h"
-#include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
-#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/core/NEON/kernels/NECropKernel.h"
-#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDilateKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h"
-#include "arm_compute/core/NEON/kernels/NEErodeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTScaleKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFloorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGatherKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h"
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
-#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h"
-#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h"
-#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NERangeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h"
-#include "arm_compute/core/NEON/kernels/NERemapKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReorgLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReverseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
-#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NESelectKernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h"
-#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEStackLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h"
-#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h"
-#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h"
-#include "arm_compute/core/NEON/kernels/NETileKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h"
-
-#endif /* ARM_COMPUTE_NEKERNELS_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H
-#define ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the absolute difference kernel
- *
- * Absolute difference is computed by:
- * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f]
- */
-class NEAbsoluteDifferenceKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEAbsoluteDifferenceKernel";
- }
- /** Default constructor */
- NEAbsoluteDifferenceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEAbsoluteDifferenceKernel(const NEAbsoluteDifferenceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEAbsoluteDifferenceKernel &operator=(const NEAbsoluteDifferenceKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEAbsoluteDifferenceKernel(NEAbsoluteDifferenceKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEAbsoluteDifferenceKernel &operator=(NEAbsoluteDifferenceKernel &&) = default;
- /** Default destructor */
- ~NEAbsoluteDifferenceKernel() = default;
-
- /** Set the inputs and output tensors
- *
- * @param[in] input1 Source tensor. Data types supported: U8/S16
- * @param[in] input2 Source tensor. Data types supported: U8/S16
- * @param[out] output Destination tensor, Data types supported: U8/S16
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised absolute difference functions
- *
- * @param[in] input1 An input tensor. Data types supported: U8/S16.
- * @param[in] input2 An input tensor. Data types supported: U8/S16.
- * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16.
- * @param[in] window Region on which to execute the kernel.
- */
- using AbsDiffFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
-
- /** Absolute difference function to use for the particular tensor formats passed to configure() */
- AbsDiffFunction *_func;
- const ITensor *_input1;
- const ITensor *_input2;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEACCUMULATEKERNEL_H
-#define ARM_COMPUTE_NEACCUMULATEKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the accumulate kernel
- *
- * Accumulation is computed by:
- * @f[ accum(x,y) = accum(x,y) + input(x,y) @f]
- */
-class NEAccumulateKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEAccumulateKernel";
- }
- /** Set the input and accumulation tensors
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] accum Destination tensor. Data type supported: S16.
- */
- void configure(const ITensor *input, ITensor *accum);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-};
-
-/** Interface for the accumulate weighted kernel
- *
- * Weighted accumulation is computed:
- * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f]
- *
- * Where @f$ 0 \le \alpha \le 1 @f$
- * Conceptually, the rounding for this is defined as:
- * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f]
-*/
-class NEAccumulateWeightedKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEAccumulateWeightedKernel";
- }
- /** Default constructor */
- NEAccumulateWeightedKernel();
- /** Set the input and accumulation tensors, and the scale value
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[in] alpha Scalar value in the range [0.0f, 1.0f]
- * @param[in,out] accum Accumulated tensor. Data type supported: U8.
- */
- void configure(const ITensor *input, float alpha, ITensor *accum);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-protected:
- float _alpha;
-};
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** Interface for the accumulate weighted kernel using F16 */
-class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel
-{
-public:
- const char *name() const override
- {
- return "NEAccumulateWeightedFP16Kernel";
- }
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-};
-#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-/** Interface for the accumulate weighted kernel using F16 */
-using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
-/** Interface for the accumulate squared kernel
- *
- * The accumulation of squares is computed:
- * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f]
- *
- * Where @f$ 0 \le shift \le 15 @f$
-*/
-class NEAccumulateSquaredKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEAccumulateSquaredKernel";
- }
- /** Default constructor */
- NEAccumulateSquaredKernel();
- /** Set the input and accumulation tensors and the shift value.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[in] shift Shift value in the range of [0, 15]
- * @param[in,out] accum Accumulated tensor. Data type supported: S16.
- */
- void configure(const ITensor *input, uint32_t shift, ITensor *accum);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- uint32_t _shift;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEACCUMULATEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#include <arm_fp16.h>
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the activation layer kernel. */
-class NEActivationLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEActivationLayerKernel";
- }
- /** Constructor */
- NEActivationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEActivationLayerKernel(const NEActivationLayerKernel &) = delete;
- /** Default move constructor */
- NEActivationLayerKernel(NEActivationLayerKernel &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEActivationLayerKernel &operator=(const NEActivationLayerKernel &) = delete;
- /** Default move assignment operator */
- NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default;
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr, the activation function will be performed in-place
- *
- * @param[in, out] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[out] output Destination tensor info. Data type supported: same as @p input
- * @param[in] activation_info Activation layer information.
- */
- void configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
- ActivationLayerInfo _act_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H
-#define ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform addition between two tensors */
-class NEArithmeticAdditionKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEArithmeticAdditionKernel";
- }
- /** Default constructor */
- NEArithmeticAdditionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEArithmeticAdditionKernel(const NEArithmeticAdditionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEArithmeticAdditionKernel &operator=(const NEArithmeticAdditionKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEArithmeticAdditionKernel(NEArithmeticAdditionKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEArithmeticAdditionKernel &operator=(NEArithmeticAdditionKernel &&) = default;
- /** Default destructor */
- ~NEArithmeticAdditionKernel() = default;
-
- /** Initialise the kernel's input, output and border mode.
- *
- * Valid configurations (Input1,Input2) -> Output :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (S32,S32) -> S32
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- * - (QASYMM8,QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (QSYMM16,QSYMM16) -> QSYMM16
- *
- * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
- * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
- * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
- * @param[in] policy Overflow policy.
- */
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy);
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAdditionKernel
- *
- * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
- * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
- * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
- * @param[in] policy Overflow policy.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised add functions
- *
- * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32
- * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32
- * @param[out] output The output tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32.
- * @param[in] policy Overflow policy.
- * @param[in] window Region on which to execute the kernel.
- */
- using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const Window &window);
- /** Add function to use for the particular tensor types passed to configure() */
- AddFunction *_func;
- ConvertPolicy _policy;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H
-#define ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform subtraction between two tensors */
-class NEArithmeticSubtractionKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEArithmeticSubtractionKernel";
- }
- /** Default constructor */
- NEArithmeticSubtractionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEArithmeticSubtractionKernel(const NEArithmeticSubtractionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEArithmeticSubtractionKernel &operator=(const NEArithmeticSubtractionKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEArithmeticSubtractionKernel(NEArithmeticSubtractionKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEArithmeticSubtractionKernel &operator=(NEArithmeticSubtractionKernel &&) = default;
- /** Default destructor */
- ~NEArithmeticSubtractionKernel() = default;
-
- /** Initialise the kernel's input and output.
- *
- * Valid configurations (Input1,Input2) -> Output :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (QASYMM8, QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (S32,S32) -> S32
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- *
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
- * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32.
- * @param[in] policy Overflow policy. Convert policy cannot be WRAP if datatype is quantized.
- */
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy);
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtractionKernel
- *
- * Valid configurations (Input1,Input2) -> Output :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (QASYMM8, QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (S32,S32) -> S32
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- *
- * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
- * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
- * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32.
- * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised sub functions
- *
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
- * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32.
- * @param[in] window Region on which to execute the kernel.
- * @param[in] is_sat Flag to indicate if the policy is SATURATE.
- */
- using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window, bool is_sat);
- /** Sub function to use for the particular tensor types passed to configure() */
- SubFunction *_func;
- ConvertPolicy _policy;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the batch concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class NEBatchConcatenateLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBatchConcatenateLayerKernel";
- }
- /** Default constructor */
- NEBatchConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBatchConcatenateLayerKernel(const NEBatchConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBatchConcatenateLayerKernel &operator=(const NEBatchConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBatchConcatenateLayerKernel(NEBatchConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBatchConcatenateLayerKernel &operator=(NEBatchConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~NEBatchConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in,out] output Output tensor info. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEBatchConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
- using BatchConcatFunction = void(const ITensor *in, ITensor *out, unsigned int batch_offset, const Window &window);
-
-private:
- BatchConcatFunction *_func;
- unsigned int _batch_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the batch normalization layer kernel.
- */
-class NEBatchNormalizationLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBatchNormalizationLayerKernel";
- }
- /** Default constructor */
- NEBatchNormalizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBatchNormalizationLayerKernel(const NEBatchNormalizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBatchNormalizationLayerKernel &operator=(const NEBatchNormalizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- NEBatchNormalizationLayerKernel(NEBatchNormalizationLayerKernel &&) = default;
- /** Default move assignment operator */
- NEBatchNormalizationLayerKernel &operator=(NEBatchNormalizationLayerKernel &&) = default;
- /** Default destructor */
- ~NEBatchNormalizationLayerKernel() = default;
- /** Set the input and output tensors.
- *
- * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
- * 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: F16/F32.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
- * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
- * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
- * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- */
- void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta = nullptr, const ITensor *gamma = nullptr, float epsilon = 0.001f,
- ActivationLayerInfo act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEBatchNormalizationLayerKernel
- *
- * @param[in] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result.
- * 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: F16/F32.
- * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
- * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
- * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
- * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *mean, const ITensorInfo *var,
- const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr,
- float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Configure execution function in case of non-fused activation **/
- void configure_non_fused();
- /** Configure execution function in case of fused activation **/
- void configure_fused();
-
- /** Template function to run batch normalization on fp32
- *
- * @tparam T Specialization data type
- * @tparam fused_activation Boolean that flags if its a fused activation or not
- * @tparam F Activation function functor to run
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T, bool fused_activation, typename F>
- void batch_normalization_nchw(const Window &window);
- /** Template function to run batch normalization on fp32 on tensors with NHWC format
- *
- * @tparam T Specialization data type
- * @tparam fused_activation Boolean that flags if its a fused activation or not
- * @tparam F Activation function functor to run
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T, bool fused_activation, typename F>
- void batch_normalization_nhwc(const Window &window);
- /** Common signature for all the batch normalization functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using BatchNormFunctionPtr = void (NEBatchNormalizationLayerKernel::*)(const Window &window);
-
-private:
- BatchNormFunctionPtr _func;
- ITensor *_input;
- ITensor *_output;
- const ITensor *_mean;
- const ITensor *_var;
- const ITensor *_gamma;
- const ITensor *_beta;
- float _epsilon;
- ActivationLayerInfo _act_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H
-#define ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the batch to space kernel */
-class NEBatchToSpaceLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBatchToSpaceLayerKernel";
- }
- /** Default constructor */
- NEBatchToSpaceLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBatchToSpaceLayerKernel(const NEBatchToSpaceLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBatchToSpaceLayerKernel &operator=(const NEBatchToSpaceLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBatchToSpaceLayerKernel(NEBatchToSpaceLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBatchToSpaceLayerKernel &operator=(NEBatchToSpaceLayerKernel &&) = default;
- /** Default destructor */
- ~NEBatchToSpaceLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ITensor *input, const ITensor *block_shape, ITensor *output);
- /** Initialise the kernel's inputs and output (Static block shape).
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ITensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel (Static block shape).
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input; /**< Source tensor */
- const ITensor *_block_shape; /**< Block shape tensor */
- ITensor *_output; /**< Destination tensor */
- DataLayout _data_layout; /**< Data layout to be used at run-time */
-
- int32_t _block_shape_x;
- int32_t _block_shape_y;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBITWISEANDKERNEL_H
-#define ARM_COMPUTE_NEBITWISEANDKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform bitwise AND between XY-planes of two tensors
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f]
- */
-class NEBitwiseAndKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBitwiseAndKernel";
- }
- /** Default constructor */
- NEBitwiseAndKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseAndKernel(const NEBitwiseAndKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseAndKernel &operator=(const NEBitwiseAndKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBitwiseAndKernel(NEBitwiseAndKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBitwiseAndKernel &operator=(NEBitwiseAndKernel &&) = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input1 An input tensor. Data type supported: U8.
- * @param[in] input2 An input tensor. Data type supported: U8
- * @param[out] output Output tensor. Data type supported: U8.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input1; /**< Source tensor 1 */
- const ITensor *_input2; /**< Source tensor 2 */
- ITensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBITWISEANDKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBITWISENOTKERNEL_H
-#define ARM_COMPUTE_NEBITWISENOTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform bitwise NOT operation
- *
- * Result is computed by:
- * @f[ output(x,y) = \lnot input(x,y) @f]
- */
-class NEBitwiseNotKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBitwiseNotKernel";
- }
- /** Default constructor */
- NEBitwiseNotKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseNotKernel(const NEBitwiseNotKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseNotKernel &operator=(const NEBitwiseNotKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBitwiseNotKernel(NEBitwiseNotKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBitwiseNotKernel &operator=(NEBitwiseNotKernel &&) = default;
- /** Initialise the kernel's input and output
- *
- * @param[in] input An input tensor. Data type supported: U8.
- * @param[out] output The output tensor. Data type supported: U8.
- */
- void configure(const ITensor *input, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input; /**< Source tensor */
- ITensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBITWISENOTKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBITWISEORKERNEL_H
-#define ARM_COMPUTE_NEBITWISEORKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform bitwise inclusive OR between two tensors
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f]
- */
-class NEBitwiseOrKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBitwiseOrKernel";
- }
- /** Default constructor */
- NEBitwiseOrKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseOrKernel(const NEBitwiseOrKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseOrKernel &operator=(const NEBitwiseOrKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBitwiseOrKernel(NEBitwiseOrKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBitwiseOrKernel &operator=(NEBitwiseOrKernel &&) = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input1 An input tensor. Data type supported: U8.
- * @param[in] input2 An input tensor. Data type supported: U8
- * @param[out] output Output tensor. Data type supported: U8.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input1; /**< Source tensor 1 */
- const ITensor *_input2; /**< Source tensor 2 */
- ITensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBITWISEORKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBITWISEXORKERNEL_H
-#define ARM_COMPUTE_NEBITWISEXORKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform bitwise exclusive OR (XOR) between two tensors
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f]
- */
-class NEBitwiseXorKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBitwiseXorKernel";
- }
- /** Default constructor */
- NEBitwiseXorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseXorKernel(const NEBitwiseXorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBitwiseXorKernel &operator=(const NEBitwiseXorKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBitwiseXorKernel(NEBitwiseXorKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBitwiseXorKernel &operator=(NEBitwiseXorKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input1 An input tensor. Data type supported: U8.
- * @param[in] input2 An input tensor. Data type supported: U8
- * @param[out] output The output tensor. Data type supported: U8.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input1; /**< Source tensor 1 */
- const ITensor *_input2; /**< Source tensor 2 */
- ITensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBITWISEXORKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H
-#define ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the bounding box kernel */
-class NEBoundingBoxTransformKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEBoundingBoxTransformKernel";
- }
-
- /** Default constructor */
- NEBoundingBoxTransformKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBoundingBoxTransformKernel(const NEBoundingBoxTransformKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEBoundingBoxTransformKernel &operator=(const NEBoundingBoxTransformKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEBoundingBoxTransformKernel(NEBoundingBoxTransformKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEBoundingBoxTransformKernel &operator=(NEBoundingBoxTransformKernel &&) = default;
- /** Default destructor */
- ~NEBoundingBoxTransformKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
- * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
- * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
- * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input.
- * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
- *
- * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
- *
- */
- void configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform
- *
- * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
- * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
- * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
- * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input.
- * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
- *
- * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- template <typename T>
- void internal_run(const Window &window);
-
- const ITensor *_boxes;
- ITensor *_pred_boxes;
- const ITensor *_deltas;
- BoundingBoxTransformInfo _bbinfo;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBOX3x3KERNEL_H
-#define ARM_COMPUTE_NEBOX3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a Box 3x3 filter */
-class NEBox3x3Kernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEBox3x3Kernel";
- }
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data type supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-};
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** NEON kernel to perform a Box 3x3 filter for FP16 datatype
- */
-class NEBox3x3FP16Kernel : public NEBox3x3Kernel
-{
-public:
- const char *name() const override
- {
- return "NEBox3x3FP16Kernel";
- }
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-};
-#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-/** NEON kernel to perform a Box 3x3 filter for FP16 datatype */
-using NEBox3x3FP16Kernel = NEBox3x3Kernel;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEBOX3x3KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECANNYEDGEKERNEL_H
-#define ARM_COMPUTE_NECANNYEDGEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Computes magnitude and quantised phase from inputs gradients. */
-class NEGradientKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGradientKernel";
- }
- /** Default constructor */
- NEGradientKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGradientKernel(const NEGradientKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGradientKernel &operator=(const NEGradientKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGradientKernel(NEGradientKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGradientKernel &operator=(NEGradientKernel &&) = default;
- /** Default destructor */
- virtual ~NEGradientKernel() = default;
-
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @note gx, gy and magnitude must all be the same size (either 16 or 32)
- *
- * @param[in] gx Source tensor - Gx component. Data type supported: S16/S32.
- * @param[in] gy Source tensor - Gy component. Data type supported: same as @p gx.
- * @param[out] magnitude Destination tensor - Magnitude. Data type supported: U16 (if the data type of @p gx is S16) / U32 (if the data type of @p gx is S32).
- * @param[out] phase Destination tensor - Quantized phase. Data type supported: U8.
- * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm
- */
- virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-protected:
- /** Common signature for all the specialised gradient functions
- *
- * @param[in] gx_ptr Pointer to the first input tensor.
- * @param[in] gy_ptr Pointer to the second input tensor.
- * @param[out] magnitude_ptr Pointer to the first output tensor
- * @param[out] phase_ptr Pointer to the second output tensor
- */
- using GradientFunction = void(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr);
-
- GradientFunction *_func; /**< Gradient function to use for the particular tensor types passed to configure() */
- const ITensor *_gx; /**< Source tensor - Gx component */
- const ITensor *_gy; /**< Source tensor - Gy component */
- ITensor *_magnitude; /**< Destination tensor - Magnitude */
- ITensor *_phase; /**< Destination tensor - Quantized phase */
-};
-
-/** NEON kernel to perform Non-Maxima suppression for Canny Edge.
- *
- * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input
- * to characterize points as possible edges. Thus, at the end, each point will be set to EDGE, NO_EDGE or MAYBE.
- *
- * @note Hysteresis is computed in @ref NEEdgeTraceKernel
- */
-class NEEdgeNonMaxSuppressionKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEEdgeNonMaxSuppressionKernel";
- }
- /** Default constructor */
- NEEdgeNonMaxSuppressionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEEdgeNonMaxSuppressionKernel(const NEEdgeNonMaxSuppressionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEEdgeNonMaxSuppressionKernel &operator=(const NEEdgeNonMaxSuppressionKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEEdgeNonMaxSuppressionKernel(NEEdgeNonMaxSuppressionKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEEdgeNonMaxSuppressionKernel &operator=(NEEdgeNonMaxSuppressionKernel &&) = default;
- /** Default destructor */
- ~NEEdgeNonMaxSuppressionKernel() = default;
-
- /** Initialise the kernel's sources, destination and border mode.
- *
- * @param[in] magnitude Source tensor - Magnitude. Data type supported: U16/U32.
- * @param[in] phase Source tensor - Quantized phase. Data type supported: U8.
- * @param[out] output Output tensor. Data type supported: U8. It will be filled with 0 for "no edge", 127 for "maybe", 255 for "edge"
- * @param[in] upper_thr Upper threshold used for the hysteresis
- * @param[in] lower_thr Lower threshold used for the hysteresis
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Common signature for all the specialised non-maxima suppression functions
- *
- * @param[in] magnitude_ptr Pointer to the first input tensor.
- * @param[in] phase_ptr Pointer to the second input tensor.
- * @param[out] output_ptr Pointer to the output tensor
- * @param[in] stride_mag Stride of the magnitude tensor
- * @param[in] upper_thr Upper threshold used for the hysteresis
- * @param[in] lower_thr Lower threshold used for the hysteresis
- */
- using EdgeNonMaxSupprFunction = void(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t upper_thr,
- const int32_t lower_thr);
-
- EdgeNonMaxSupprFunction *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */
- const ITensor *_magnitude; /**< Source tensor - Magnitude */
- const ITensor *_phase; /**< Source tensor - Quantized phase */
- ITensor *_output; /**< Destination tensor */
- int32_t _lower_thr; /**< Lower threshold used for the hysteresis */
- int32_t _upper_thr; /**< Upper threshold used for the hysteresis */
-};
-
-/** NEON kernel to perform Edge tracing */
-class NEEdgeTraceKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEEdgeTraceKernel";
- }
- /** Default constructor */
- NEEdgeTraceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEEdgeTraceKernel(const NEEdgeTraceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEEdgeTraceKernel &operator=(const NEEdgeTraceKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEEdgeTraceKernel(NEEdgeTraceKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEEdgeTraceKernel &operator=(NEEdgeTraceKernel &&) = default;
- /** Default constructor */
- ~NEEdgeTraceKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in,out] input Source tensor. Data type supported: U8. Must contain 0 for "no edge", 127 for "maybe", 255 for "edge"
- * @param[in,out] output Destination tensor. Data type supported: U8. Must be initialized to 0 (No edge).
- */
- void configure(ITensor *input, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
- bool is_parallelisable() const override;
-
-private:
- ITensor *_input; /**< Source tensor */
- ITensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECANNYEDGEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H
-#define ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <array>
-#include <cstdint>
-
-namespace arm_compute
-{
-class IMultiImage;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the channel combine kernel */
-class NEChannelCombineKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEChannelCombineKernel";
- }
- /** Default constructor */
- NEChannelCombineKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEChannelCombineKernel(const NEChannelCombineKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEChannelCombineKernel &operator=(const NEChannelCombineKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEChannelCombineKernel(NEChannelCombineKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEChannelCombineKernel &operator=(NEChannelCombineKernel &&) = default;
- /** Default destructor */
- ~NEChannelCombineKernel() = default;
-
- /** Configure function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8
- * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8
- * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8
- * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8
- * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- */
- void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output);
- /** Configure function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8
- * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8
- * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8
- * @param[out] output The multi planar output tensor. Formats supported: NV12/NV21/IYUV/YUV444
- */
- void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- bool is_parallelisable() const override;
-
-private:
- /** Combine 3 planes to form a three channel single plane tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void combine_3C(const Window &win);
- /** Combine 4 planes to form a four channel single plane tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void combine_4C(const Window &win);
- /** Combine 3 planes to form a single plane YUV tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- template <bool is_yuyv>
- void combine_YUV_1p(const Window &win);
- /** Combine 3 planes to form a two plane YUV tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void combine_YUV_2p(const Window &win);
- /** Combine 3 planes to form a three plane YUV tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void combine_YUV_3p(const Window &win);
- /** Copies a full plane to the output tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void copy_plane(const Window &win, uint32_t plane_id);
- /** Common signature for all the specialised ChannelCombine functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using ChannelCombineFunction = void (NEChannelCombineKernel::*)(const Window &window);
- /** ChannelCombine function to use for the particular tensor types passed to configure() */
- ChannelCombineFunction _func;
- std::array<const ITensor *, 4> _planes;
- ITensor *_output;
- IMultiImage *_output_multi;
- std::array<uint32_t, 3> _x_subsampling;
- std::array<uint32_t, 3> _y_subsampling;
- unsigned int _num_elems_processed_per_iteration;
- bool _is_parallelizable;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H
-#define ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class IMultiImage;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the channel extract kernel */
-class NEChannelExtractKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEChannelExtractKernel";
- }
- /** Default constructor */
- NEChannelExtractKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEChannelExtractKernel(const NEChannelExtractKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEChannelExtractKernel &operator=(const NEChannelExtractKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEChannelExtractKernel(NEChannelExtractKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEChannelExtractKernel &operator=(NEChannelExtractKernel &&) = default;
- /** Default destructor */
- ~NEChannelExtractKernel() = default;
-
- /** Set the input and output of the kernel
- *
- * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
- * @param[in] channel Channel to extract.
- * @param[out] output Destination tensor. Format supported: U8
- */
- void configure(const ITensor *input, Channel channel, ITensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
- * @param[in] channel Channel to extract.
- * @param[out] output Single-planar destination image. Format supported: U8
- */
- void configure(const IMultiImage *input, Channel channel, IImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Extract one channel from a two channel planar tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void extract_1C_from_2C_img(const Window &win);
- /** Extract one channel from a three channel planar tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void extract_1C_from_3C_img(const Window &win);
- /** Extract one channel from a four channel planar tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void extract_1C_from_4C_img(const Window &win);
- /** Extract U/V channel from a single planar YUVY/UYVY tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void extract_YUYV_uv(const Window &win);
- /** Copies a full plane to the output tensor.
- *
- * @param[in] win Region on which to execute the kernel.
- */
- void copy_plane(const Window &win);
- /** Common signature for all the specialised ChannelExtract functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using ChannelExtractFunction = void (NEChannelExtractKernel::*)(const Window &window);
- /** ChannelExtract function to use for the particular tensor types passed to configure() */
- ChannelExtractFunction _func;
- unsigned int _lut_index;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H
-#define ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the channel shuffle kernel */
-class NEChannelShuffleLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEChannelShuffleLayerKernel";
- }
- /** Default constructor */
- NEChannelShuffleLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEChannelShuffleLayerKernel(const NEChannelShuffleLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEChannelShuffleLayerKernel &operator=(const NEChannelShuffleLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEChannelShuffleLayerKernel(NEChannelShuffleLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEChannelShuffleLayerKernel &operator=(NEChannelShuffleLayerKernel &&) = default;
- /** Default destructor */
- ~NEChannelShuffleLayerKernel() = default;
- /** Configure function's inputs and outputs.
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
- */
- void configure(const ITensor *input, ITensor *output, unsigned int num_groups);
- /** Static function to check if given info will lead to a valid configuration of @ref NEChannelShuffleLayerKernel
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- unsigned int _num_groups;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECOL2IMKERNEL_H
-#define ARM_COMPUTE_NECOL2IMKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include "arm_compute/core/Size2D.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform col2im reshaping.
- *
- * Rearranges each matrix column into image blocks. It's the inverse operation of @ref NEIm2ColKernel.
- *
- * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3:
- *
- * @f[
- * \left( \begin{array}{ccccccccc}
- * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccc}
- * a0 & a1 & a2 \\
- * a3 & a4 & a5 \\
- * a6 & a7 & a8 \\
- * \end{array} \right)
- * @f]
- */
-class NECol2ImKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NECol2ImKernel";
- }
- /** Default constructor */
- NECol2ImKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECol2ImKernel(const NECol2ImKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECol2ImKernel &operator=(const NECol2ImKernel &) = delete;
- /** Allow instances of this class to be moved */
- NECol2ImKernel(NECol2ImKernel &&) = default;
- /** Allow instances of this class to be moved */
- NECol2ImKernel &operator=(NECol2ImKernel &&) = default;
- /** Default destructor */
- ~NECol2ImKernel() = default;
-
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. Data types supported: All
- * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input
- * @param[in] convolved_dims Output convolved dimensions.
- */
- void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims);
- /** Static function to check if given info will lead to a valid configuration of @ref NECol2ImKernel
- *
- * @param[in] input The input tensor to convert. Data types supported: All
- * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input
- * @param[in] convolved_dims Output convolved dimensions.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the col2im
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run_col2im(const Window &window);
-
- /** Common signature for all the specialised col2im functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using Col2ImFunctionPtr = void (NECol2ImKernel::*)(const Window &window);
-
- Col2ImFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- Size2D _convolved_dims;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECOL2IMKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_COLORCONVERTKERNEL_H
-#define ARM_COMPUTE_COLORCONVERTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class IMultiImage;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the color convert kernel */
-class NEColorConvertKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEColorConvertKernel";
- }
- /** Default constructor */
- NEColorConvertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEColorConvertKernel(const NEColorConvertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEColorConvertKernel &operator=(const NEColorConvertKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEColorConvertKernel(NEColorConvertKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEColorConvertKernel &operator=(NEColorConvertKernel &&) = default;
- /** Default destructor */
- ~NEColorConvertKernel() = default;
-
- /** Set the input and output of the kernel
- *
- * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
- * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
- * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
- * U8 (if the formats of @p input is RGB888)
- */
- void configure(const ITensor *input, ITensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
- */
- void configure(const IMultiImage *input, IImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
- */
- void configure(const IImage *input, IMultiImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
- */
- void configure(const IMultiImage *input, IMultiImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- using ColorConvertFunction = void(const void *__restrict input_ptr, void *__restrict output_ptr, const Window &win);
- const void *_input;
- void *_output;
- ColorConvertFunction *_func;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECOLORCONVERTKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
-#define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa.
- *
- * @note This function can be applied to the 2D weights used by a Fully Connected layer if:
- * - It follows a Convolution layer
- * - The data layout used by the network does not match the one the model has been trained in.
- *
- * @note This function assumes the weights are already reshaped (transposed)
- */
-class NEConvertFullyConnectedWeightsKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEConvertFullyConnectedWeightsKernel";
- }
- /** Default constructor */
- NEConvertFullyConnectedWeightsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvertFullyConnectedWeightsKernel(const NEConvertFullyConnectedWeightsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvertFullyConnectedWeightsKernel &operator=(const NEConvertFullyConnectedWeightsKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEConvertFullyConnectedWeightsKernel(NEConvertFullyConnectedWeightsKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEConvertFullyConnectedWeightsKernel &operator=(NEConvertFullyConnectedWeightsKernel &&) = default;
- /** Default destructor */
- ~NEConvertFullyConnectedWeightsKernel() = default;
- /** Set the input and output tensor.
- *
- * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
- * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
- * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
- * @param[in] data_layout The data layout the weights have been trained in.
- */
- void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
- /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeightsKernel
- *
- * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
- * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input.
- * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
- * @param[in] data_layout The data layout the weights have been trained in.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the permute
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run_convert_fc_weights(const Window &window);
-
- const ITensor *_input;
- ITensor *_output;
- unsigned int _factor1; /* equals to the number of elements per original input plane if @p data_layout == NCHW; its number of channels otherwise */
- unsigned int _factor2; /* equals to the number of elements per original input plane if @p data_layout == NHWC; its number of channels otherwise */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H
-#define ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** NEON kernel to convert asymmetric signed to asymmetric signed and vice-versa */
-class NEConvertQuantizedSignednessKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEConvertQuantizedSignednessKernel";
- }
- /** Default constructor */
- NEConvertQuantizedSignednessKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NEConvertQuantizedSignednessKernel(const NEConvertQuantizedSignednessKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NEConvertQuantizedSignednessKernel &operator=(const NEConvertQuantizedSignednessKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEConvertQuantizedSignednessKernel(NEConvertQuantizedSignednessKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEConvertQuantizedSignednessKernel &operator=(NEConvertQuantizedSignednessKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data types supported: opposite of @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor. Data types supported: opposite of @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECONVOLUTIONKERNEL_H
-#define ARM_COMPUTE_NECONVOLUTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-#include <array>
-#include <cstdint>
-#include <vector>
-
-namespace arm_compute
-{
-class ITensor;
-
-/****************************************************************************************\
- * Square Convolution *
-\****************************************************************************************/
-
-/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9).
- * The client can supply a convolution matrix \f$ C_{m,n} \f$.
- * @f{eqnarray}{
- * k_0 &=& \frac{m}{2} \\
- * l_0 &=& \frac{n}{2} \\
- * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l}
- * @f}
- *
- * @note The above equation for this function is similar to the default OpenCV Filter2D function,
- * which actually computes a correlation and not a convolution.
- * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically.
- */
-template <unsigned int matrix_size>
-class NEConvolutionKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEConvolutionKernel";
- }
- /** Default constructor */
- NEConvolutionKernel();
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- template <typename OutputType>
- void convolution(const Window &win);
-
-protected:
- uint32_t _scale; /**< scale of the convolution */
- std::array<int16_t, matrix_size *matrix_size> _convolution; /**< convolution matrix */
-};
-
-/** Interface for the kernel which applied a 3x3 convolution to a tensor.*/
-using NEConvolution3x3Kernel = NEConvolutionKernel<3>;
-/** Interface for the kernel which applied a 5x5 convolution to a tensor.*/
-using NEConvolution5x5Kernel = NEConvolutionKernel<5>;
-/** Interface for the kernel which applied a 7x7 convolution to a tensor.*/
-using NEConvolution7x7Kernel = NEConvolutionKernel<7>;
-///** Interface for the kernel which applied a 9x9 convolution to a tensor.*/
-using NEConvolution9x9Kernel = NEConvolutionKernel<9>;
-
-/****************************************************************************************\
- * Separable Square Convolution *
-\****************************************************************************************/
-
-/** Kernel for the Horizontal pass of a Separable Convolution */
-template <unsigned int matrix_size>
-class NESeparableConvolutionHorKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NESeparableConvolutionHorKernel";
- }
- /** Default constructor */
- NESeparableConvolutionHorKernel();
-
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data types supported: U16, S16, S32.
- * @param[in] conv_row Convolution matrix to apply to the input tensor.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Apply the object's convolution to the given window of the input tensor..
- *
- * @param[in] window Window to apply the convolution on.
- */
- template <typename OutputType>
- void convolve(const Window &window);
-
- std::array<int16_t, matrix_size> _conv_row; /**< Convolution coefficients */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel which applied a 5x1 horizontal convolution to a tensor.*/
-using NESeparableConvolution5x5HorKernel = NESeparableConvolutionHorKernel<5>;
-/** Interface for the kernel which applied a 7x1 horizontal convolution to a tensor.*/
-using NESeparableConvolution7x7HorKernel = NESeparableConvolutionHorKernel<7>;
-/** Interface for the kernel which applied a 9x1 horizontal convolution to a tensor.*/
-using NESeparableConvolution9x9HorKernel = NESeparableConvolutionHorKernel<9>;
-
-/** Kernel for the Vertical pass of a Separable Convolution */
-template <unsigned int matrix_size>
-class NESeparableConvolutionVertKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NESeparableConvolutionVertKernel";
- }
- /** Default constructor */
- NESeparableConvolutionVertKernel();
-
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U16, S16, S32.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv_col Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Apply the object's convolution to the given window of the input tensor.
- * This function is used if the intermediate values have been stored as U16.
- *
- * @param[in] win Window to apply the convolution on.
- */
- template <typename OutputType>
- void convolution_u16(const Window &win);
- /** Apply the object's convolution to the given window of the input tensor.
- * This function is used if the intermediate values have been stored as S16.
- *
- * @param[in] win Window to apply the convolution on.
- */
- template <typename OutputType>
- void convolution_s16(const Window &win);
- /** Apply the object's convolution to the given window of the input tensor.
- * This function is used if the intermediate values have been stored as S32.
- *
- * @param[in] win Window to apply the convolution on.
- */
- template <typename OutputType>
- void convolution_s32(const Window &win);
-
- std::array<int16_t, matrix_size> _conv_col; /**< Convolution coefficients */
- uint32_t _scale; /**< Convolution's scale */
-};
-
-/** Interface for the kernel which applied a 1x5 vertical convolution to a tensor.*/
-using NESeparableConvolution5x5VertKernel = NESeparableConvolutionVertKernel<5>;
-/** Interface for the kernel which applied a 1x7 vertical convolution to a tensor.*/
-using NESeparableConvolution7x7VertKernel = NESeparableConvolutionVertKernel<7>;
-/** Interface for the kernel which applied a 1x9 vertical convolution to a tensor.*/
-using NESeparableConvolution9x9VertKernel = NESeparableConvolutionVertKernel<9>;
-
-/****************************************************************************************\
- * Rectangle Convolution *
-\****************************************************************************************/
-
-/** Kernel for the running convolution on a rectangle matrix.
- *
- * @note Supports combinations of 3,5,7 and 9.
- */
-class NEConvolutionRectangleKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEConvolutionRectangleKernel";
- }
- /** Default constructor */
- NEConvolutionRectangleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] width Width of convolution matrix (Number of columns)
- * @param[in] height Height of convolution matrix (Number of rows)
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- unsigned int get_index(uint32_t val);
- /** Apply the object's convolution to the given window of the input tensor.
- *
- * @param[in] win Window to apply the convolution on.
- */
- template <typename OutputType, unsigned int rows, unsigned int cols>
- void convolution(const Window &win);
-
-protected:
- const ITensor *_input; /**< Input tensor */
- ITensor *_output; /**< Output tensor */
- uint32_t _scale; /**< Scale of the convolution */
- std::vector<int16_t> _convolution; /**< Convolution matrix */
- BorderSize _border_size; /**< Calculated border width */
- uint32_t _func_idx; /**< Index used to specify convolution function to be used */
- const static unsigned int _nr_supported_sizes
- {
- 4
- }; /**< Number of supported permutations */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECONVOLUTIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECOPYKERNEL_H
-#define ARM_COMPUTE_NECOPYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a copy between two tensors */
-class NECopyKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NECopyKernel";
- }
- /** Default constructor */
- NECopyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NECopyKernel(const NECopyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NECopyKernel &operator=(const NECopyKernel &) = delete;
- /** Allow instances of this class to be moved */
- NECopyKernel(NECopyKernel &&) = default;
- /** Allow instances of this class to be moved */
- NECopyKernel &operator=(NECopyKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input Source tensor. Data types supported: All
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] padding (Optional) Padding to be applied to the input tensor
- */
- void configure(const ITensor *input, ITensor *output, const PaddingList &padding = PaddingList());
- /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel
- *
- * @param[in] input Source tensor. Data types supported: All
- * @param[in] output Destination tensor. Data types supported: same as @p input.
- * @param[in] padding (Optional) Padding to be applied to the input tensor
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding = PaddingList());
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- PaddingList _padding;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECOPYKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEON_CROP_KERNEL_H
-#define ARM_COMPUTE_NEON_CROP_KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to perform tensor cropping */
-class NECropKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NECropKernel";
- }
- /** Default constructor */
- NECropKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECropKernel(const NECropKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECropKernel &operator=(const NECropKernel &) = delete;
- /** Allow instances of this class to be moved */
- NECropKernel(NECropKernel &&) = default;
- /** Allow instances of this class to be moved */
- NECropKernel &operator=(NECropKernel &&) = default;
- /** Default destructor */
- ~NECropKernel() = default;
- /** Configure kernel
- *
- * @note Supported tensor rank: up to 4
- * @note Padding not supported.
- *
- * @param[in] input Source tensor. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
- * @param[in] crop_boxes Tensor containing all possible boxes used to crop the image, each represented by 4 normalized values.
- * Data type supported: F32
- * @param[in] box_ind One dimensional tensor mapping the @p crop_box_ind to the index of the 3D image in @p input.
- * Data type supported: F32
- * @param[out] output Destination tensor. Data type supported: F32
- * @param[in] crop_box_ind Index of the crop box to be used from @p crop_boxes. Default is 0.
- * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- */
- void configure(const ITensor *input, const ITensor *crop_boxes, const ITensor *box_ind, ITensor *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
- *
- * @note Supported tensor rank: up to 4
- * @note Padding not supported.
- *
- * @param[in] input Source tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
- * @param[in] crop_boxes Tensor info for tensor containing all possible boxes used to crop the image. Data type supported: F32
- * @param[in] box_ind Tensor info for the one dimensional tensor mapping the @p crop_box_ind to the index of the 3D image
- * in @p input. Data type supported: F32
- * @param[in] output Destination tensor. Data type supported: F32
- * @param[in] crop_box_ind Index of the crop box to be used from @p crop_boxes. Default is 0.
- * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *crop_boxes, const ITensorInfo *box_ind, const ITensorInfo *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0);
-
- /** Configure output tensor's shape as this can only be determined at runtime. */
- void configure_output_shape();
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
- /** Function to use for in bounds crop for the particular tensor types passed to configure() */
- using InBoundsCropFunction = void(const ITensor *, const ITensor *, float *, Coordinates, int32_t, int32_t, int32_t, bool, bool);
-
-private:
- const ITensor *_input;
- const ITensor *_crop_boxes;
- const ITensor *_box_ind;
- ITensor *_output;
-
- Coordinates _start;
- Coordinates _end;
- uint32_t _crop_box_ind;
- float _extrapolation_value;
- /** The number of rows out of bounds at the start and end of output. */
- std::array<uint32_t, 2> _rows_out_of_bounds;
- /** The number of columns out of bounds at the start and end of output. */
- std::array<uint32_t, 2> _cols_out_of_bounds;
-
- NECropKernel::InBoundsCropFunction *_in_bounds_crop_function;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEON_CROP_KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H
-#define ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class IDistribution1D;
-class ILut;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the cumulative distribution (cummulative summmation) calculation kernel.
- *
- * This kernel calculates the cumulative sum of a given distribution (meaning that each output element
- * is the sum of all its previous elements including itself) and creates a lookup table with the normalized
- * pixel intensities which is used for improve the constrast of the image.
- */
-class NECumulativeDistributionKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NECumulativeDistributionKernel";
- }
- /** Default constructor */
- NECumulativeDistributionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECumulativeDistributionKernel(const NECumulativeDistributionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECumulativeDistributionKernel &operator=(const NECumulativeDistributionKernel &) = delete;
- /** Allow instances of this class to be moved */
- NECumulativeDistributionKernel(NECumulativeDistributionKernel &&) = default;
- /** Allow instances of this class to be moved */
- NECumulativeDistributionKernel &operator=(NECumulativeDistributionKernel &&) = default;
- /** Set the input and output distribution.
- *
- * @param[in] input Input image. Data type supported: U8
- * @param[in] distribution Unnormalized 256-bin distribution of the input image.
- * @param[out] cumulative_sum Cummulative distribution (Summed histogram). Should be same size as @p distribution.
- * @param[out] output Equalization lookup table. Should consist of 256 entries of U8 elements.
- */
- void configure(const IImage *input, const IDistribution1D *distribution, IDistribution1D *cumulative_sum, ILut *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- bool is_parallelisable() const override;
-
-private:
- const IImage *_input; /**< Input image. */
- const IDistribution1D *_distribution; /**< Input histogram of the input image. */
- IDistribution1D *_cumulative_sum; /**< The cummulative distribution. */
- ILut *_output; /**< Output with the equalization lookup table. */
-private:
- static const uint32_t _histogram_size = 256; /**< Default histogram size of 256. */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the depth concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class NEDepthConcatenateLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthConcatenateLayerKernel";
- }
- /** Default constructor */
- NEDepthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthConcatenateLayerKernel(const NEDepthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthConcatenateLayerKernel &operator=(const NEDepthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDepthConcatenateLayerKernel(NEDepthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDepthConcatenateLayerKernel &operator=(NEDepthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~NEDepthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] depth_offset The offset on the Z axis.
- * @param[in,out] output Output tensor info. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] depth_offset The offset on the Z axis.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
- using DepthConcatFunction = void(const ITensor *in, ITensor *out, unsigned int depth_offset, const Window &window);
-
-private:
- DepthConcatFunction *_func;
- unsigned int _depth_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_DEPTHCONVERTKERNEL_H
-#define ARM_COMPUTE_DEPTHCONVERTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Depth conversion kernel
- * This function ignores the scale and zeroPoint of quanized tensors, i.e. QASYMM8 input is treated as uint8 values.
- */
-class NEDepthConvertLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthConvertLayerKernel";
- }
- /** Default constructor*/
- NEDepthConvertLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthConvertLayerKernel(const NEDepthConvertLayerKernel &) = delete;
- /** Default move constructor */
- NEDepthConvertLayerKernel(NEDepthConvertLayerKernel &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthConvertLayerKernel &operator=(const NEDepthConvertLayerKernel &) = delete;
- /** Default move assignment operator */
- NEDepthConvertLayerKernel &operator=(NEDepthConvertLayerKernel &&) = default;
- /** Set the input and output of the kernel
- *
- * Valid conversions Input -> Output :
- *
- * - QASYMM8_SIGNED -> S16, S32, F32, F16
- * - QASYMM8 -> U16, S16, S32, F32, F16
- * - U8 -> U16, S16, S32, F32, F16
- * - U16 -> U8, U32
- * - S16 -> QASYMM8_SIGNED, U8, S32
- * - BFLOAT16 -> F32
- * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8
- * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8
- * - F32 -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8
- *
- * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32.
- * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32.
- * @param[in] policy Conversion policy.
- * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
- */
- void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32.
- * @param[in] policy Conversion policy
- * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- ConvertPolicy _policy;
- uint32_t _shift;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H
-#define ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the depth to space kernel */
-class NEDepthToSpaceLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthToSpaceLayerKernel";
- }
- /** Default constructor */
- NEDepthToSpaceLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthToSpaceLayerKernel(const NEDepthToSpaceLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthToSpaceLayerKernel &operator=(const NEDepthToSpaceLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDepthToSpaceLayerKernel(NEDepthToSpaceLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDepthToSpaceLayerKernel &operator=(NEDepthToSpaceLayerKernel &&) = default;
- /** Default destructor */
- ~NEDepthToSpaceLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape x value.
- */
- void configure(const ITensor *input, ITensor *output, int32_t block_shape);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthToSpaceLayerKernel.
- *
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All
- * @param[in] output Tensor output info. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input; /**< Source tensor */
- ITensor *_output; /**< Destination tensor */
- int32_t _block_shape; /**< Block shape */
- DataLayout _data_layout; /**< Data layout of the operation */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-#include "support/Requires.h"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#include <arm_neon.h>
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to run a depthwise convolution native on a tensor. */
-class NEDepthwiseConvolutionLayerNativeKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthwiseConvolutionLayerNativeKernel";
- }
- /** Default constructor */
- NEDepthwiseConvolutionLayerNativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayerNativeKernel(const NEDepthwiseConvolutionLayerNativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayerNativeKernel &operator=(const NEDepthwiseConvolutionLayerNativeKernel &) = delete;
- /** Default Move Constructor. */
- NEDepthwiseConvolutionLayerNativeKernel(NEDepthwiseConvolutionLayerNativeKernel &&) = default;
- /** Default move assignment operator */
- NEDepthwiseConvolutionLayerNativeKernel &operator=(NEDepthwiseConvolutionLayerNativeKernel &&) = default;
- /** Initialize the function's source, destination and parameters.
- *
- * @note Supported data layouts: NHWC
- *
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- */
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- const Size2D &dilation = Size2D(1U, 1U));
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerNativeKernel
- *
- * @note Supported data layouts: NHWC
- *
- * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor info. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- const Size2D &dilation = Size2D(1U, 1U));
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- template <typename T>
- using FloatEnalber = typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, int>::type;
-
- template <typename T, typename TW, FloatEnalber<T> = 0>
- void run_depthwise(const Window &window, bool has_biases);
-
- template <typename T>
- using Quantized8bitEnalber = typename std::enable_if < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int >::type;
-
- template <typename T, typename TW, Quantized8bitEnalber<T> = 0>
- void run_depthwise(const Window &window, bool has_biases);
-
- /** Common signature for all the specialised depthwise convolution native functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using DepthwiseFunctionPtr = void (NEDepthwiseConvolutionLayerNativeKernel::*)(const Window &window, bool has_biases);
-
- DepthwiseFunctionPtr _func;
- const ITensor *_input;
- const ITensor *_weights;
- const ITensor *_biases;
- ITensor *_output;
- PadStrideInfo _conv_info;
- unsigned int _depth_multiplier;
- Size2D _dilation;
- std::vector<int> _output_multiplier;
- std::vector<int> _output_shift;
- bool _has_biases;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the dequantization layer kernel. */
-class NEDequantizationLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDequantizationLayerKernel";
- }
- /** Default constructor */
- NEDequantizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDequantizationLayerKernel(const NEDequantizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDequantizationLayerKernel &operator=(const NEDequantizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- NEDequantizationLayerKernel(NEDequantizationLayerKernel &&) = default;
- /** Default move assignment operator */
- NEDequantizationLayerKernel &operator=(NEDequantizationLayerKernel &&) = default;
- /** Default destructor */
- ~NEDequantizationLayerKernel() = default;
- /** Set input, output tensors.
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
- * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
- * @param[in] output Output tensor info. Data types supported: F16/F32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDERIVATIVEKERNEL_H
-#define ARM_COMPUTE_NEDERIVATIVEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run the derivative along the X/Y directions on a tensor.
- *
- */
-class NEDerivativeKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDerivativeKernel";
- }
- /** Default constructor */
- NEDerivativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDerivativeKernel(const NEDerivativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDerivativeKernel &operator=(const NEDerivativeKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDerivativeKernel(NEDerivativeKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDerivativeKernel &operator=(NEDerivativeKernel &&) = default;
- /** Initialise the kernel's sources, destination and border
- *
- * @note At least one of output_x or output_y must be set
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Function to perform derivative along the X direction on the given window
- *
- * @param[in] window Region on which to execute the kernel
- */
- void derivative_x(const Window &window);
- /** Function to perform derivative along the Y direction on the given window
- *
- * @param[in] window Region on which to execute the kernel
- */
- void derivative_y(const Window &window);
- /** Function to perform derivative along the X and Y direction on the given window
- *
- * @param[in] window Region on which to execute the kernel
- */
- void derivative_xy(const Window &window);
- /** Common signature for all the specialised derivative functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using DerivativeFunction = void (NEDerivativeKernel::*)(const Window &window);
- /** Derivative function to use for the particular tensor types passed to configure() */
- DerivativeFunction _func;
-
-private:
- const ITensor *_input; /**< Input tensor */
- ITensor *_output_x; /**< Output tensor - Derivate along the X direction */
- ITensor *_output_y; /**< Output tensor - Derivate along the Y direction */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDERIVATIVEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDILATEKERNEL_H
-#define ARM_COMPUTE_NEDILATEKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform boolean image dilatation */
-class NEDilateKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEDilateKernel";
- }
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data type supported: U8
- * @param[out] output Destination tensor. Data type supported: U8
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDILATEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON interface for Direct Convolution Layer kernel */
-class NEDirectConvolutionLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDirectConvolutionLayerKernel";
- }
- /** Default constructor */
- NEDirectConvolutionLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDirectConvolutionLayerKernel(const NEDirectConvolutionLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDirectConvolutionLayerKernel &operator=(const NEDirectConvolutionLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDirectConvolutionLayerKernel(NEDirectConvolutionLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDirectConvolutionLayerKernel &operator=(NEDirectConvolutionLayerKernel &&) = default;
- /** Default destructor */
- ~NEDirectConvolutionLayerKernel() = default;
- /** Set the input, weights, and output tensors.
- *
- * @note: DirectConvolution only works in the following configurations:
- * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3
- * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3
- *
- * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * The 3rd dimension must be the same as the input's volume 3rd dimension.
- * Data type supported:Same as @p input.
- * @param[out] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- */
- void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerKernel
- *
- * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * The 3rd dimension must be the same as the input's volume 3rd dimension.
- * Data type supported:Same as @p input.
- * @param[in] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /* Template function for optimized convolution NHWC */
- template <typename T>
- void convolve_nhwc_optimized(const Window &window);
-
- /* Template function for convolution NHWC */
- template <typename T>
- void convolve_nhwc(const Window &window);
-
- const ITensor *_input;
- const ITensor *_weights;
- ITensor *_output;
- PadStrideInfo _conv_info;
- BorderSize _border_size;
- unsigned int _kernel_size;
- unsigned int _num_weight_elems_read_per_row;
- unsigned int _num_elems_read_per_iteration;
- unsigned int _num_elems_written_per_iteration;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H
-#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input.
- *
- * @note We assume bias to be shared
- * @note For quantized computations (i.e. @p input of S32 type) the output data type for auto-initialization must be passed as part
- * of the @ref DirectConvolutionLayerOutputStageKernelInfo.
- */
-class NEDirectConvolutionLayerOutputStageKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDirectConvolutionLayerOutputStageKernel";
- }
- /** Default constructor */
- NEDirectConvolutionLayerOutputStageKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDirectConvolutionLayerOutputStageKernel(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDirectConvolutionLayerOutputStageKernel &operator=(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDirectConvolutionLayerOutputStageKernel(NEDirectConvolutionLayerOutputStageKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDirectConvolutionLayerOutputStageKernel &operator=(NEDirectConvolutionLayerOutputStageKernel &&) = default;
- /** Default destructor */
- ~NEDirectConvolutionLayerOutputStageKernel() = default;
- /** Set the accumulate buffer and the biases of the kernel.
- *
- * @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: F16/F32/S32
- * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
- * @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr.
- * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32
- * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata
- */
- void configure(ITensor *input, const ITensor *bias = nullptr, ITensor *output = nullptr,
- const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel
- *
- * @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
- * Data type supported: F16/F32/S32
- * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
- * @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
- * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr.
- * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32
- * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr,
- const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo());
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
- int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias);
-
-private:
- OutputStageKernel *_func;
- ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
- int _result_fixedpoint_multiplier;
- int _result_shift;
- int _result_offset_after_shift;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H
-#define ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for an element-wise operation kernel
- *
- * Element-wise operation is computed by:
- * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f]
- *
- */
-class NEElementwiseOperationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEElementwiseOperationKernel";
- }
- /** Default constructor */
- NEElementwiseOperationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEElementwiseOperationKernel(const NEElementwiseOperationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEElementwiseOperationKernel &operator=(const NEElementwiseOperationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEElementwiseOperationKernel(NEElementwiseOperationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEElementwiseOperationKernel &operator=(NEElementwiseOperationKernel &&) = default;
- /** Default destructor */
- ~NEElementwiseOperationKernel() = default;
-
- /** Common signature for all the specialised arithmetic functions
- *
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Dependent on subclass.
- * @param[in] window Region on which to execute the kernel.
- */
- using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-protected:
- /** Validate the argument passed to the kernel
- *
- * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
- * @param[in] output Output tensor. Data types supported: Dependent on subclass.
- */
- static Status validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-
- /** Commmon configure function for element-wise operators with no additional options (e.g. Min, Max, SquaredDiff)
- *
- */
- void configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
-
- /** Function to use for the particular tensor types passed to configure() */
- std::function<void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window)> _function;
-
- const ITensor *_input1;
- const ITensor *_input2;
- ITensor *_output;
-};
-
-class NEArithmeticOperationKernel : public NEElementwiseOperationKernel
-{
-public:
- /** Default constructor */
- NEArithmeticOperationKernel() = default;
-
- /** Configure kernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
- */
- void configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a Status
- */
- static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
-protected:
- // Inherited methods overridden:
- static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-};
-
-class NEDivisionOperationKernel : public NEArithmeticOperationKernel
-{
-public:
- /** Default constructor */
- NEDivisionOperationKernel() = default;
-
- /** Configure kernel
- *
- * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
- */
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEDivisionOperationKernel
- *
- * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
-protected:
- // Inherited methods overridden:
- static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-};
-
-class NEPowerOperationKernel : public NEArithmeticOperationKernel
-{
-public:
- /** Default constructor */
- NEPowerOperationKernel() = default;
-
- /** Configure kernel
- *
- * @param[in] input1 First tensor input info. Data types supported: F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: Same as @p input1.
- */
- void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEPowerOperationKernel
- *
- * @param[in] input1 First tensor input info. Data types supported: F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
-protected:
- // Inherited methods overridden:
- static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-};
-
-class NEComparisonOperationKernel : public NEElementwiseOperationKernel
-{
-public:
- /** Default constructor */
- NEComparisonOperationKernel() = default;
-
- /** Configure kernel
- *
- * @param[in] op Comparison operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[out] output Output tensor info. Data types supported: U8.
- */
- void configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
- *
- * @param[in] op Comparison operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: U8.
- *
- * @return a Status
- */
- static Status validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
-protected:
- // Inherited methods overridden:
- static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H
-#define ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for an element-wise unary operation kernel
- *
- * Element-wise operation is computed by:
- * @f[ output(x) = OP(input(x))@f]
- *
- */
-class NEElementwiseUnaryKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEElementwiseUnaryKernel";
- }
- /** Default constructor */
- NEElementwiseUnaryKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEElementwiseUnaryKernel(const NEElementwiseUnaryKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEElementwiseUnaryKernel &operator=(const NEElementwiseUnaryKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEElementwiseUnaryKernel(NEElementwiseUnaryKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEElementwiseUnaryKernel &operator=(NEElementwiseUnaryKernel &&) = default;
- /** Default destructor */
- ~NEElementwiseUnaryKernel() = default;
-
- /** Function to configure the @ref NEElementwiseUnaryKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input First tensor input. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- */
- void configure(ElementWiseUnary op, const ITensor *input, ITensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEElementwiseUnaryKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input First tensor input info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a Status
- */
- static Status validate(ElementWiseUnary op, const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised arithmetic functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using ElementwiseUnaryPtr = void (NEElementwiseUnaryKernel::*)(const Window &window);
-
- /** Template function to run elementwise unary operation
- *
- * @tparam ScalarType Scalar datatype
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename ScalarType>
- void elementwise_op(const Window &window);
-
- ElementwiseUnaryPtr _func;
- const ITensor *_input;
- ITensor *_output;
- ElementWiseUnary _op;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEERODEKERNEL_H
-#define ARM_COMPUTE_NEERODEKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform boolean image erosion */
-class NEErodeKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEErodeKernel";
- }
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data type supported: U8
- * @param[out] output Destination tensor. Data type supported: U8
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEERODEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H
-#define ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the digit reverse operation kernel. */
-class NEFFTDigitReverseKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFFTDigitReverseKernel";
- }
- /** Constructor */
- NEFFTDigitReverseKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFFTDigitReverseKernel(const NEFFTDigitReverseKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFFTDigitReverseKernel &operator=(const NEFFTDigitReverseKernel &) = delete;
- /** Default Move Constructor. */
- NEFFTDigitReverseKernel(NEFFTDigitReverseKernel &&) = default;
- /** Default move assignment operator */
- NEFFTDigitReverseKernel &operator=(NEFFTDigitReverseKernel &&) = default;
- /** Default destructor */
- ~NEFFTDigitReverseKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
- * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor).
- * @param[in] idx Digit reverse index tensor. Data type supported: U32
- * @param[in] config Kernel configuration.
- */
- void configure(const ITensor *input, ITensor *output, const ITensor *idx, const FFTDigitReverseKernelInfo &config);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEFFTDigitReverseKernel
- *
- * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
- * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor).
- * @param[in] idx Digit reverse index tensor info. Data type supported: U32
- * @param[in] config Kernel configuration
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- using NEFFTDigitReverseKernelFunctionPtr = void (NEFFTDigitReverseKernel::*)(const Window &window);
-
- template <bool is_input_complex, bool is_conj>
- void digit_reverse_kernel_axis_0(const Window &window);
-
- template <bool is_input_complex, bool is_conj>
- void digit_reverse_kernel_axis_1(const Window &window);
-
- NEFFTDigitReverseKernelFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- const ITensor *_idx;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H
-#define ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <arm_neon.h>
-#include <set>
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the FFT kernel. */
-class NEFFTRadixStageKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFFTRadixStageKernel";
- }
- /** Constructor */
- NEFFTRadixStageKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFFTRadixStageKernel(const NEFFTRadixStageKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFFTRadixStageKernel &operator=(const NEFFTRadixStageKernel &) = delete;
- /** Default Move Constructor. */
- NEFFTRadixStageKernel(NEFFTRadixStageKernel &&) = default;
- /** Default move assignment operator */
- NEFFTRadixStageKernel &operator=(NEFFTRadixStageKernel &&) = default;
- /** Default destructor */
- ~NEFFTRadixStageKernel() = default;
- /** Set the input and output tensors.
- *
- * @note If the output tensor is nullptr, the FFT will be performed in-place
- *
- * @param[in,out] input Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
- * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: same as @p input.
- * @param[in] config FFT descriptor metadata.
- */
- void configure(ITensor *input, ITensor *output, const FFTRadixStageKernelInfo &config);
- /** Static function to check if given info will lead to a valid configuration of @ref NEFFTRadixStageKernel
- *
- * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
- * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: same as @p input.
- * @param[in] config FFT descriptor metadata.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config);
- /** Returns the radix that are support by the FFT kernel
- *
- * @return A set of supported radix
- */
- static std::set<unsigned int> supported_radix();
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- ITensor *_input;
- ITensor *_output;
- bool _run_in_place;
- unsigned int _Nx;
- unsigned int _axis;
- unsigned int _radix;
-
- void set_radix_stage_axis0(const FFTRadixStageKernelInfo &config);
- void set_radix_stage_axis1(const FFTRadixStageKernelInfo &config);
-
- using FFTFunctionPointerAxis0 = std::function<void(float *, float *, unsigned int, unsigned int, const float32x2_t &, unsigned int)>;
- using FFTFunctionPointerAxis1 = std::function<void(float *, float *, unsigned int, unsigned int, const float32x2_t &, unsigned int, unsigned int)>;
-
- FFTFunctionPointerAxis0 _func_0;
- FFTFunctionPointerAxis1 _func_1;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFFTSCALEKERNEL_H
-#define ARM_COMPUTE_NEFFTSCALEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the inverse fft scale kernel. */
-class NEFFTScaleKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFFTScaleKernel";
- }
- /** Constructor */
- NEFFTScaleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFFTScaleKernel(const NEFFTScaleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFFTScaleKernel &operator=(const NEFFTScaleKernel &) = delete;
- /** Default Move Constructor. */
- NEFFTScaleKernel(NEFFTScaleKernel &&) = default;
- /** Default move assignment operator */
- NEFFTScaleKernel &operator=(NEFFTScaleKernel &&) = default;
- /** Default destructor */
- ~NEFFTScaleKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in,out] input Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
- * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
- * @param[in] config Kernel configuration
- */
- void configure(ITensor *input, ITensor *output, const FFTScaleKernelInfo &config);
- /** Static function to check if given info will lead to a valid configuration of @ref NEFFTScaleKernel
- *
- * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
- * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
- * @param[in] config Kernel configuration
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- ITensor *_input;
- ITensor *_output;
- float _scale;
- bool _run_in_place;
- bool _is_conj;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFFTSCALEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFASTCORNERSKERNEL_H
-#define ARM_COMPUTE_NEFASTCORNERSKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** NEON kernel to perform fast corners */
-class NEFastCornersKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFastCornersKernel";
- }
- /** Constructor */
- NEFastCornersKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFastCornersKernel(const NEFastCornersKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFastCornersKernel &operator=(const NEFastCornersKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEFastCornersKernel(NEFastCornersKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEFastCornersKernel &operator=(NEFastCornersKernel &&) = default;
- /** Initialise the kernel.
- *
- * @param[in] input Source image. Data type supported: U8.
- * @param[out] output Output image. Data type supported: U8.
- * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
- * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- const IImage *_input; /**< source image */
- IImage *_output; /**< inermediate results */
- uint8_t _threshold; /**< threshold on difference between intensity */
- bool _non_max_suppression; /** true if non-maxima suppression is applied in the next stage */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEFASTCORNERSKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFILLARRAYKERNEL_H
-#define ARM_COMPUTE_NEFILLARRAYKERNEL_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** This kernel adds all texels greater than or equal to the threshold value to the keypoint array. */
-class NEFillArrayKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFillArrayKernel";
- }
- /** Default contructor */
- NEFillArrayKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFillArrayKernel(const NEFillArrayKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFillArrayKernel &operator=(const NEFillArrayKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEFillArrayKernel(NEFillArrayKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEFillArrayKernel &operator=(NEFillArrayKernel &&) = default;
- /** Default detructor */
- ~NEFillArrayKernel() = default;
-
- /** Initialise the kernel.
- *
- * @param[in] input Source image. Data type supported: U8.
- * @param[in] threshold Texels greater than the threshold will be added to the array.
- * @param[out] output Arrays of keypoints to store the results.
- */
- void configure(const IImage *input, uint8_t threshold, IKeyPointArray *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- bool is_parallelisable() const override;
-
-private:
- const IImage *_input;
- IKeyPointArray *_output;
- uint8_t _threshold;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEFILLARRAYKERNEL_H*/
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFILLBORDERKERNEL_H
-#define ARM_COMPUTE_NEFILLBORDERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to fill borders */
-class NEFillBorderKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFillBorderKernel";
- }
- /** Default Constructor */
- NEFillBorderKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFillBorderKernel(const NEFillBorderKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFillBorderKernel &operator=(const NEFillBorderKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEFillBorderKernel(NEFillBorderKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEFillBorderKernel &operator=(NEFillBorderKernel &&) = default;
- /** Default destructor */
- ~NEFillBorderKernel() = default;
-
- /** Initialise the function.
- *
- * @note This kernel fills the borders within the XY-planes.
- *
- * @param[in,out] tensor Tensor to process. Data types supported: All.
- * @param[in] border_size Size of the border to fill in elements.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- void fill_replicate_single_channel(const Window &window);
- void fill_constant_value_single_channel(const Window &window);
-
- ITensor *_tensor;
- BorderSize _border_size;
- BorderMode _mode;
- PixelValue _constant_border_value;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFILLBORDERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFLATTENLAYERKERNEL_H
-#define ARM_COMPUTE_NEFLATTENLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the flatten layer kernel. */
-class NEFlattenLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFlattenLayerKernel";
- }
- /** Default constructor */
- NEFlattenLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFlattenLayerKernel(const NEFlattenLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFlattenLayerKernel &operator=(const NEFlattenLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEFlattenLayerKernel(NEFlattenLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEFlattenLayerKernel &operator=(NEFlattenLayerKernel &&) = default;
- /** Default destructor */
- ~NEFlattenLayerKernel() = default;
-
- /** Set the input and output of the kernel.
- *
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayerKernel
- *
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFLATTENLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFLOORKERNEL_H
-#define ARM_COMPUTE_NEFLOORKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a floor operation */
-class NEFloorKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEFloorKernel";
- }
- /** Set the source, destination of the kernel
- *
- * @param[in] input Source tensor. Data type supported: F16/F32.
- * @param[out] output Destination tensor. Same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEFloorKernel
- *
- * @param[in] input Source tensor info. Data type supported: F16/F32.
- * @param[in] output Destination tensor info. Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFLOORKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** OpenNE kernel to fuse the batch normalization node to a preceding convolution node */
-class NEFuseBatchNormalizationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFuseBatchNormalizationKernel";
- }
- /** Default constructor */
- NEFuseBatchNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFuseBatchNormalizationKernel(const NEFuseBatchNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFuseBatchNormalizationKernel &operator=(const NEFuseBatchNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEFuseBatchNormalizationKernel(NEFuseBatchNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEFuseBatchNormalizationKernel &operator=(NEFuseBatchNormalizationKernel &&) = default;
- /** Default destructor */
- ~NEFuseBatchNormalizationKernel() = default;
- /** Set the source, destination of the kernel
- *
- * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
- * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights
- * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights
- * @param[out] fused_weights (Optional) Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
- * @param[out] fused_bias (Optional) Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
- * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
- * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_beta is set to 0.0
- * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_gamma is set to 1.0
- * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
- * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
- */
- void configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias,
- const ITensor *input_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
- /** Static function to check if given info will lead to a valid configuration of @ref NEFuseBatchNormalizationKernel
- *
- * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
- * @param[in] bn_mean Batch normalization layer mean tensor info. Same as @p input_weights
- * @param[in] bn_var Batch normalization layer variance tensor info. Same as @p input_weights
- * @param[in] fused_weights (Optional) Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights
- * @param[in] fused_bias (Optional) Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
- * @param[in] input_bias (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
- * @param[in] bn_beta (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_beta is set to 0.0
- * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_gamma is set to 1.0
- * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
- * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
- const ITensorInfo *fused_weights, const ITensorInfo *fused_bias,
- const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input_weights;
- const ITensor *_input_bias;
- const ITensor *_bn_mean;
- const ITensor *_bn_var;
- const ITensor *_bn_gamma;
- const ITensor *_bn_beta;
- ITensor *_fused_weights;
- ITensor *_fused_bias;
- float _epsilon;
- bool _run_in_place_weights;
- bool _run_in_place_bias;
-
- using FuseBatchNormFunction = void(const ITensor *input_weights, const ITensor *input_bias, ITensor *fused_weights, ITensor *fused_bias,
- const ITensor *bn_mean, const ITensor *bn_var, const ITensor *bn_beta, const ITensor *bn_gamma, float epsilon, const Window &window);
-
- FuseBatchNormFunction *_func;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMASSEMBLYBASE_H
-#define ARM_COMPUTE_NEGEMMASSEMBLYBASE_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Base class for GEMM NEON kernels implemented in Assembly. */
-class NEGEMMAssemblyBaseKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMAssemblyBaseKernel";
- }
- /** Constructor */
- NEGEMMAssemblyBaseKernel()
- : _input0(nullptr), _input1(nullptr), _output(nullptr), _workspace(nullptr), _alpha(1.f), _beta(0.f), _is_transposed_0(false), _is_transposed_1(false)
- {
- }
-
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMAssemblyBaseKernel(const NEGEMMAssemblyBaseKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMAssemblyBaseKernel &operator=(const NEGEMMAssemblyBaseKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMAssemblyBaseKernel(NEGEMMAssemblyBaseKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMAssemblyBaseKernel &operator=(NEGEMMAssemblyBaseKernel &&) = default;
-
- virtual ~NEGEMMAssemblyBaseKernel() = default;
-
- /** Initialise the kernel's input and output.
- *
- * The computed function is C = a * AxB + b * C.
- *
- * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F32
- * @param[in] input1 Input tensor containing the Matrix B. Data types supported: same as @p input0
- * @param[in,out] output Output tensor to store the result of matrix multiplication. If @p beta is not zero the values are multiplied by @p beta before the result is accumulated. Otherwise the values are overwritten by the result. Data types supported: same as @p input0.
- * @param[out] workspace Space for intermediate results.
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the accumulation.
- * @param[in] is_transposed_0 (Optional)True if @p input0 is transposed else false. (Defaults to false)
- * @param[in] is_transposed_1 (Optional)True if @p input1 is transposed else false. (Defaults to false)
- */
- void configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha = 1.f, float beta = 0.f, bool is_transposed_0 = false, bool is_transposed_1 = false)
- {
- internal_configure(input0, input1, output, workspace, alpha, beta, is_transposed_0, is_transposed_1);
- }
-
-protected:
- virtual void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool _is_transposed_0, bool _is_transposed_1) = 0;
-
- const ITensor *_input0;
- const ITensor *_input1;
- ITensor *_output;
- ITensor *_workspace;
- float _alpha;
- float _beta;
- bool _is_transposed_0;
- bool _is_transposed_1;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMASSEMBLYBASE_H*/
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H
-#define ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to interleave the elements of a matrix
- *
- * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values)
- *
- * @f[
- * \left( \begin{array}{cccc}
- * a00 & a01 & a02 & a03 \\
- * a10 & a11 & a12 & a13 \\
- * a20 & a21 & a22 & a23 \\
- * a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccccccccccc}
- * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\
- * \end{array} \right)
- * @f]
- *
- * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ]
- */
-class NEGEMMInterleave4x4Kernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMInterleave4x4Kernel";
- }
- /* Constructor */
- NEGEMMInterleave4x4Kernel();
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMInterleave4x4Kernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run gemm interleave 4x4
- *
- * @tparam ScalarType Scalar datatype
- *
- * @param[in] input Input tensor. Data types supported: uint32_t, uint16_t and uint8_t
- * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename ScalarType>
- void gemm_interleave4x4(const ITensor *input, ITensor *output, const Window &window);
-
- /** Common signature for all the specialised gemm interleave 4x4 functions
- *
- * @param[in] input Input tensor. Data types supported: uint32_t, uint16_t and uint8_t
- * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- using GEMMInterleaveFunctionFuncPtr = void (NEGEMMInterleave4x4Kernel::*)(const ITensor *input, ITensor *output, const Window &window);
-
- GEMMInterleaveFunctionFuncPtr _func;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H*/
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to multiply matrices
- *
- * @note @ref NEGEMMLowpMatrixMultiplyKernel low precision matrix product kernel
- * This kernel performs the following computation:
- *
- * -# Convert a values from int8 to int32
- * -# Convert b values from int8 to int32
- * -# Compute the int32 matrix product of the resulting a * b and store the result as int32
- *
- */
-class NEGEMMLowpMatrixMultiplyKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpMatrixMultiplyKernel";
- }
- /** Constructor */
- NEGEMMLowpMatrixMultiplyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpMatrixMultiplyKernel(const NEGEMMLowpMatrixMultiplyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpMatrixMultiplyKernel &operator=(const NEGEMMLowpMatrixMultiplyKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMLowpMatrixMultiplyKernel(NEGEMMLowpMatrixMultiplyKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMLowpMatrixMultiplyKernel &operator=(NEGEMMLowpMatrixMultiplyKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel. These two
- * kernels change the layout of the original matrices to be more cache-friendly.
- *
- * @param[in] input0 Input tensor containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor containing the transposed1xW Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
- */
- void configure(const ITensor *input0, const ITensor *input1, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyKernel
- *
- * @param[in] input0 Input tensor info containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor info containing the transposed Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
- * @param[in] output Output tensor info to store the result of matrix multiplication. Data type supported: S32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input0;
- const ITensor *_input1;
- ITensor *_output;
- bool _slide_matrix_b;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H*/
+++ /dev/null
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to add the offset contribution after @ref NEGEMMLowpMatrixMultiplyKernel. The computation is performed in-place
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel),
- * and adds to it the offset contribution of matrix A and matrix B in-place.
- *
- * The final result is:
- *
- * mm_result[i][k] = mm_result[i][k] +
- * (vector_sum_col[k] * a_offset) +
- * (vector_sum_row[i] * b_offset) +
- * (a_offset * b_offset * k)
- *
- */
-class NEGEMMLowpOffsetContributionKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpOffsetContributionKernel";
- }
- /** Constructor */
- NEGEMMLowpOffsetContributionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpOffsetContributionKernel(const NEGEMMLowpOffsetContributionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpOffsetContributionKernel &operator=(const NEGEMMLowpOffsetContributionKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMLowpOffsetContributionKernel(NEGEMMLowpOffsetContributionKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMLowpOffsetContributionKernel &operator=(NEGEMMLowpOffsetContributionKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in, out] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] k Number of matrix A columns or Matrix B rows
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- */
- void configure(ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, int32_t k, int32_t a_offset, int32_t b_offset);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionKernel
- *
- * @param[in] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, int32_t a_offset, int32_t b_offset);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_vector_sum_col;
- const ITensor *_vector_sum_row;
- ITensor *_mm_result;
- int32_t _a_offset;
- int32_t _b_offset;
- int32_t _k_offset;
- bool _slide_vector_sum_col;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to add the offset contribution and perform the output stage after @ref NEGEMMLowpMatrixMultiplyKernel.
- *
- * The computation is performed in-place
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel),
- * and adds to it the offset contribution of matrix A and matrix B in-place.
- *
- * The output stage can perform either QuantizeDownInt32ToUint8Scale or QuantizeDownInt32ToUint8ScaleByFixedPoint for Uint8.
- * The output stage can perform either QuantizeDownInt32ToInt8Scale or QuantizeDownInt32ToInt8ScaleByFixedPoint for Int8.
- *
- * For QuantizeDownInt32ToUint8Scale/QuantizeDownInt32ToInt8Scale the final result is:
- *
- * ((mm_result'[i][k] + result_offset) * result_mult_int) >> result_shift
- *
- * For QuantizeDownInt32ToUint8ScaleByFixedPoint/QuantizeDownInt32ToInt8ScaleByFixedPoint the final result is:
- *
- * (FixedPointMul(mm_result'[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
- *
- * where FixedPointMul(x, y) is the nearest integer to the following
- * mathematical expression, evaluated without overflow or intermediate rounding:
- *
- * (x * y) / 2^31
- *
- * and mm_result'[i][k] = mm_result[i][k] +
- * (vector_sum_col[k] * a_offset) +
- * (vector_sum_row[i] * b_offset) +
- * (a_offset * b_offset * k)
- */
-
-class NEGEMMLowpOffsetContributionOutputStageKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpOffsetContributionOutputStageKernel";
- }
- /** Constructor */
- NEGEMMLowpOffsetContributionOutputStageKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpOffsetContributionOutputStageKernel(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpOffsetContributionOutputStageKernel &operator=(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMLowpOffsetContributionOutputStageKernel(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMLowpOffsetContributionOutputStageKernel &operator=(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result.
- * @param[out] output Output tensor containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] k Number of matrix A columns or Matrix B rows
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- * @param[in] output_stage GEMMLowp output stage info, providing the type of quantization and the necessary parameters.
- */
- void configure(const ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, const ITensor *bias, ITensor *output, int32_t k, int32_t a_offset, int32_t b_offset,
- GEMMLowpOutputStageInfo output_stage);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionOutputStageKernel
- *
- * @param[in] mm_result Input tensor info containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
- * @param[in] vector_sum_col Tensor info for the input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Tensor info for the input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result.
- * @param[in] output Output tensor info containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- * @param[in] output_stage GEMMLowp output stage info, providing the type of quantization and the necessary parameters.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset,
- int32_t b_offset,
- GEMMLowpOutputStageInfo output_stage);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Function to use for the particular tensors passed to configure() */
- const ITensor *_vector_sum_col;
- const ITensor *_vector_sum_row;
- const ITensor *_bias;
- const ITensor *_mm_result;
- ITensor *_output;
- int32_t _a_offset;
- int32_t _b_offset;
- int32_t _k_offset;
- bool _slide_vector_sum_col;
- GEMMLowpOutputStageInfo _output_stage;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
- * The following computations will be performed by the kernel:
- *
- * -# Add offset terms to final result
- * -# Multiply each entry of result by result_mult_int
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Shift the int32 accumulator by result_shift
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values:
- * -# -to the [0..255] range and cast to QASYMM8.
- * -# -to the [-128..127] range and cast to QASYMM8_SIGNED.
- *
- */
-class NEGEMMLowpQuantizeDownInt32ScaleKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpQuantizeDownInt32ScaleKernel";
- }
- /** Constructor */
- NEGEMMLowpQuantizeDownInt32ScaleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ScaleKernel(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ScaleKernel(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[out] output_stage GEMMLowp output stage metadata.
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo *output_stage);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ScaleKernel
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[out] output_stage GEMMLowp output stage metadata.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the NEGEMMLowpQuantizeDownInt32ScaleKernel
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run(const Window &window);
-
- /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ScaleKernel functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ScaleKernel::*)(const Window &window);
-
- QuantizeDownFunctionPtr _func;
- const ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
- const GEMMLowpOutputStageInfo *_output_stage;
- bool _is_bounded_relu;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QSYMM16 value.
- * The following computations will be performed by the kernel:
- *
- * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Round to nearest division by a power-of-two using result_shift
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values to the [-32768, 32767] range and cast to QSYMM16.
- *
- */
-class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel";
- }
- /** Constructor */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
- *
- * @param[in] input Input tensor info. Data type supported: S32
- * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor info with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <bool is_bounded_relu>
- void run(const Window &window);
-
- /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::*)(const Window &window);
-
- QuantizeDownFunctionPtr _func;
- const ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
- int _result_fixedpoint_multiplier;
- int _result_shift;
- int _min;
- int _max;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8_SIGNED value.
- * The following computations will be performed by the kernel:
- *
- * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Round to nearest division by a power-of-two using result_shift
- * -# Add offset to each result
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values to the [-128..127] range and cast to QASYMM8_SIGNED.
- *
- */
-class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel";
- }
- /** Constructor */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
- * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <bool is_bounded_relu>
- void run(const Window &window);
-
- /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::*)(const Window &window);
-
- QuantizeDownFunctionPtr _func;
- const ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
- int _result_fixedpoint_multiplier;
- int _result_shift;
- int _result_offset_after_shift;
- int _min;
- int _max;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8 value.
- * The following computations will be performed by the kernel:
- *
- * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Round to nearest division by a power-of-two using result_shift
- * -# Add offset to each result
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8.
- *
- */
-class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel";
- }
- /** Constructor */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
- * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
- * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
- * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
- * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
- * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <bool is_bounded_relu>
- void run(const Window &window);
-
- /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::*)(const Window &window);
-
- QuantizeDownFunctionPtr _func;
- const ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
- int _result_fixedpoint_multiplier;
- int _result_shift;
- int _result_offset_after_shift;
- int _min;
- int _max;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-struct GEMMLowpReductionKernelInfo;
-
-/** Common interface for all NEON reduction kernels */
-class INEGEMMLowpReductionKernel : public INEKernel
-{
-public:
- /** Constructor */
- INEGEMMLowpReductionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- INEGEMMLowpReductionKernel(const INEGEMMLowpReductionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- INEGEMMLowpReductionKernel &operator=(const INEGEMMLowpReductionKernel &) = delete;
- /** Allow instances of this class to be moved */
- INEGEMMLowpReductionKernel(INEGEMMLowpReductionKernel &&) = default;
- /** Allow instances of this class to be moved */
- INEGEMMLowpReductionKernel &operator=(INEGEMMLowpReductionKernel &&) = default;
-
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
- * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- */
- virtual void configure(const ITensor *input, ITensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
-
-protected:
- const ITensor *_input;
- ITensor *_output;
- int32_t _k;
- int32_t _scalar;
- bool _mul_by_scalar;
-};
-
-/** NEON kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
- *
- * @note This stage is needed to handle the offset of matrix product
- * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
- */
-class NEGEMMLowpMatrixAReductionKernel : public INEGEMMLowpReductionKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpMatrixAReductionKernel";
- }
- /** Initialise the kernel's input and output.
- *
- * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
- * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k (num_mtx_a_cols) Number of matrix A columns
- * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4
- * - scalar Scalar value to multiply each reduced row by.
- * - mul_byscalar True if each reduced column must be multiplied by a scalar value.
- */
- void configure(const ITensor *mtx_a, ITensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixAReductionKernel
- *
- * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
- * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k (num_mtx_a_cols) Number of matrix A columns
- * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4
- * - scalar Scalar value to multiply each reduced row by.
- * - mul_byscalar True if each reduced column must be multiplied by a scalar value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Execution of the reduction kernel specialized on the input type
- *
- * @param[in] window Execution window
- */
- template <typename T>
- void run_internal(const Window &window);
-};
-
-/** NEON kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
- *
- * @note This stage is needed to handle the offset of matrix product
- * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
- */
-class NEGEMMLowpMatrixBReductionKernel : public INEGEMMLowpReductionKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMLowpMatrixBReductionKernel";
- }
- /** Initialise the kernel's input and output.
- *
- * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
- * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k (num_mtx_b_rows) Number of matrix B rows.
- * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW.
- * - scalar Scalar value to multiply each reduced row by.
- * - mul_byscalar True if each reduced row must be multiplied by a scalar value.
- */
- void configure(const ITensor *mtx_b, ITensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixBReductionKernel
- *
- * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
- * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k (num_mtx_b_rows) Number of matrix B rows.
- * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW.
- * - scalar Scalar value to multiply each reduced row by.
- * - mul_byscalar True if each reduced row must be multiplied by a scalar value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Execution of the reduction kernel specialized on the input type
- *
- * @param[in] window Execution window
- * @param[in] info Thread-related information
- */
- template <typename T>
- void run_internal(const Window &window, const ThreadInfo &info);
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H
-#define ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta:
- *
- * @note [ MTX_OUT = MTX_0 + beta * MTX_1 ] with MTX_0 and MTX_1 of the same size
- *
- * @note This stage is used to finalize the GEMM result and it is computed if and only if beta != 0.0. In case this kernel is used for finalizing GEMM result, we have:
- * - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref NEGEMMMatrixMultiplyKernel
- * - MTX_1 = C
- */
-class NEGEMMMatrixAdditionKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMMatrixAdditionKernel";
- }
- /** Constructor */
- NEGEMMMatrixAdditionKernel();
- /** Prevent instances of this class from being copied */
- NEGEMMMatrixAdditionKernel(const NEGEMMMatrixAdditionKernel &) = delete;
- /** Prevent instances of this class from being copied */
- NEGEMMMatrixAdditionKernel &operator=(const NEGEMMMatrixAdditionKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMMatrixAdditionKernel(NEGEMMMatrixAdditionKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMMatrixAdditionKernel &operator=(NEGEMMMatrixAdditionKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @note The input and output tensor must have the same dimensions
- *
- * @param[in] input Input tensor (Matrix C). Data types supported: F16/F32
- * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input.
- * @param[in] beta Weight of matrix C
- */
- void configure(const ITensor *input, ITensor *output, float beta);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAdditionKernel.
- *
- * @note The input and output tensor must have the same dimensions
- *
- * @param[in] input Input tensor info (Matrix C). Data types supported: F16/F32
- * @param[in] output Output tensor info. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input.
- * @param[in] beta Weight of matrix C
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the matrix addition functions
- *
- * @param[in] input An input tensor. Data types supported: F16/F32
- * @param[out] output The output tensor. Data type supported: same as @p input
- * @param[in] window Region on which to execute the kernel.
- * @param[in] beta Weight of matrix C
- */
- using MatrixAdditionFunction = void(const ITensor *input, ITensor *output, const Window &window, float beta);
- /** Matrix addition function to use for the particular tensor types passed to configure() */
- MatrixAdditionFunction *_func;
- float _beta;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication
- *
- * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref NEGEMMInterleave4x4Kernel" and @ref NEGEMMTranspose1xWKernel
- * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped
- *
- */
-class NEGEMMMatrixMultiplyKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMMatrixMultiplyKernel";
- }
- /** Constructor */
- NEGEMMMatrixMultiplyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMMatrixMultiplyKernel(const NEGEMMMatrixMultiplyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMMatrixMultiplyKernel &operator=(const NEGEMMMatrixMultiplyKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGEMMMatrixMultiplyKernel(NEGEMMMatrixMultiplyKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGEMMMatrixMultiplyKernel &operator=(NEGEMMMatrixMultiplyKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
- * These two kernels change the layout of the original matrices to be more cache-friendly.
- *
- * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
- * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
- * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
- * @param[in] alpha Weight of the matrix product
- * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
- * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
- */
- void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixMultiplyKernel
- *
- * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
- * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
- * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
- * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
- * @param[in] alpha Weight of the matrix product
- * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
- * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input0;
- const ITensor *_input1;
- ITensor *_output;
- float _alpha;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H*/
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H
-#define ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
- *
- * Following an example of how the transposition1xW works when the input data is F32
- *
- * @f[
- * \left( \begin{array}{cccc}
- * a00 & a01 & a02 & a03 \\
- * a10 & a11 & a12 & a13 \\
- * a20 & a21 & a22 & a23 \\
- * a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccccccccccc}
- * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * @f]
- *
- * Following an example of how the transposition1xW works when the input data type is F16
- *
- * @f[
- * \left( \begin{array}{cccccccc}
- * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 \\
- * a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 \\
- * a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 \\
- * a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc}
- * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\
- * \end{array} \right)
- * @f]
- *
- * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
- *
- */
-class NEGEMMTranspose1xWKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGEMMTranspose1xWKernel";
- }
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xWKernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] output Output tensor info. Data type supported: same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEGATHERKERNEL_H
-#define ARM_COMPUTE_NEGATHERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Kernel to perform other operation on NEON */
-class NEGatherKernel : public INEKernel
-{
-public:
- /** Default constructor. */
- NEGatherKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NEGatherKernel(const NEGatherKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NEGatherKernel &operator=(const NEGatherKernel &) = delete;
- /** Allow instances of this class to be moved. */
- NEGatherKernel(NEGatherKernel &&) = default;
- /** Allow instances of this class to be moved. */
- NEGatherKernel &operator=(NEGatherKernel &&) = default;
- /** Default detructor */
- ~NEGatherKernel() = default;
-
- /** Name of the kernel
- *
- * @return Kernel name
- */
- const char *name() const override
- {
- return "NEGatherKernel";
- }
- /** Initialise the kernel's inputs and outputs
- *
- * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All
- * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
- */
- void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0);
- /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel
- *
- * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All
- * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Implementation of the gather operation for 0 axis.
- *
- * For gather on the 0 axis an element by element copy is performed.
- *
- * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window())
- * @param[in] info Info about executing thread and CPU.
- */
- template <typename U>
- void gather_0_axis(const Window &window, const ThreadInfo &info);
-
- /** Implementation of the gather operation.
- *
- * For 1<=axis a row-wise copy is taking place.
- *
- * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window())
- * @param[in] info Info about executing thread and CPU.
- */
- template <typename U>
- void gather_n_axis(const Window &window, const ThreadInfo &info);
-
- using kernel_ptr = void (NEGatherKernel::*)(const Window &window, const ThreadInfo &info);
-
- const ITensor *_input;
- const ITensor *_indices;
- int _axis;
- ITensor *_output;
- kernel_ptr _func;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGATHERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H
-#define ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a Gaussian 3x3 filter */
-class NEGaussian3x3Kernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGaussian3x3Kernel";
- }
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data type supported: U8
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H
-#define ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a Gaussian 5x5 filter (horizontal pass) */
-class NEGaussian5x5HorKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGaussian5x5HorKernel";
- }
- /** Default constructor */
- NEGaussian5x5HorKernel();
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data type supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- BorderSize _border_size;
-};
-
-/** NEON kernel to perform a Gaussian 5x5 filter (vertical pass) */
-class NEGaussian5x5VertKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGaussian5x5VertKernel";
- }
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data type supported: S16.
- * @param[out] output Destination tensor, Data type supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H
-#define ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a GaussianPyramid (horizontal pass) */
-class NEGaussianPyramidHorKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGaussianPyramidHorKernel";
- }
- /** Default constructor */
- NEGaussianPyramidHorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &&) = default;
- /** Default destructor */
- ~NEGaussianPyramidHorKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Output should have half the input width. Data type supported: S16.
- */
- void configure(const ITensor *input, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- int _l2_load_offset;
-};
-
-/** NEON kernel to perform a GaussianPyramid (vertical pass) */
-class NEGaussianPyramidVertKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEGaussianPyramidVertKernel";
- }
- /** Default constructor */
- NEGaussianPyramidVertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &&) = default;
- /** Default destructor */
- ~NEGaussianPyramidVertKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data type supported: S16.
- * @param[out] output Destination tensor. Output should have half the input height. Data type supported: U8.
- */
- void configure(const ITensor *input, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- int _t2_load_offset;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H
-#define ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for Compute All Anchors kernel */
-class NEComputeAllAnchorsKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEComputeAllAnchorsKernel";
- }
-
- /** Default constructor */
- NEComputeAllAnchorsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEComputeAllAnchorsKernel(const NEComputeAllAnchorsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEComputeAllAnchorsKernel &operator=(const NEComputeAllAnchorsKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEComputeAllAnchorsKernel(NEComputeAllAnchorsKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEComputeAllAnchorsKernel &operator=(NEComputeAllAnchorsKernel &&) = default;
- /** Default destructor */
- ~NEComputeAllAnchorsKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
- * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
- * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
- *
- */
- void configure(const ITensor *anchors, ITensor *all_anchors, const ComputeAnchorsInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel
- *
- * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
- * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
- * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- template <typename T>
- void internal_run(const Window &window);
-
- const ITensor *_anchors;
- ITensor *_all_anchors;
- ComputeAnchorsInfo _anchors_info;
-};
-} // arm_compute
-#endif // ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H
-#define ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H
-
-#include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Size2D.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform HOG Orientation Binning */
-class NEHOGOrientationBinningKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEHOGOrientationBinningKernel";
- }
- /** Default constructor */
- NEHOGOrientationBinningKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGOrientationBinningKernel(const NEHOGOrientationBinningKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGOrientationBinningKernel &operator=(const NEHOGOrientationBinningKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEHOGOrientationBinningKernel(NEHOGOrientationBinningKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEHOGOrientationBinningKernel &operator=(NEHOGOrientationBinningKernel &&) = default;
- /** Default destructor */
- ~NEHOGOrientationBinningKernel() = default;
-
- /** Initialise the kernel's inputs, output and HOG's metadata
- *
- * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
- * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
- * @param[out] output Output tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[in] hog_info HOG's metadata
- */
- void configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised block normalization functions
- *
- * @param[in] mag_row_ptr Pointer to the first row of the cell in the magnitude tensor
- * @param[in] phase_row_ptr Pointer to the first row of the cell in the phase tensor
- * @param[out] output_ptr Pointer to the output cell of hog space tensor
- * @param[in] mag_stride Stride of the magnitude tensor
- * @param[in] phase_stride Stride of the phase tensor
- * @param[in] cell_width Width of the cell
- * @param[in] cell_height Height of the cell
- * @param[in] num_bins Number of bins for each cell
- * @param[in] phase_scale Scale factor to apply to the phase in order to calculate the histogram index
- */
- using OrientBinFunc = void(const int16_t *__restrict mag_row_ptr, const uint8_t *__restrict phase_row_ptr, float *__restrict output_ptr, size_t mag_stride, size_t phase_stride, size_t cell_width,
- size_t cell_height, size_t num_bins, float phase_scale);
- /** Orientation binning function to use for the particular cell width passed to configure() */
- OrientBinFunc *_func;
- const ITensor *_input_magnitude;
- const ITensor *_input_phase;
- ITensor *_output;
- size_t _cell_width;
- size_t _cell_height;
- size_t _num_bins;
- float _phase_scale;
-};
-
-/** NEON kernel to perform HOG block normalization */
-class NEHOGBlockNormalizationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEHOGBlockNormalizationKernel";
- }
- /** Default constructor */
- NEHOGBlockNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGBlockNormalizationKernel(const NEHOGBlockNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGBlockNormalizationKernel &operator=(const NEHOGBlockNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEHOGBlockNormalizationKernel(NEHOGBlockNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEHOGBlockNormalizationKernel &operator=(NEHOGBlockNormalizationKernel &&) = default;
- /** Default destructor */
- ~NEHOGBlockNormalizationKernel() = default;
-
- /** Initialise the kernel's input, output and HOG's metadata
- *
- * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog_info HOG's metadata
- */
- void configure(const ITensor *input, ITensor *output, const HOGInfo *hog_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised block normalization functions
- *
- * @param[in] input_row_ptr Pointer to the first row of the block in the input hog space tensor
- * @param[out] output_ptr Pointer to the output block of the hog normalized space
- * @param[in] input_stride Stride of the input hog space tensor
- * @param[in] num_cells_per_block_height Number of cells per block along the Y direction
- * @param[in] num_bins_block_x Number of bins per block along the X direction
- * @param[in] num_bins_block Number of total bins per block
- * @param[in] l2_hyst_threshold Threshold to use for l2 hysteresis normalization
- */
- using BlockNormFunc = void(const float *input_row_ptr, float *output_ptr, size_t input_stride, size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block,
- float l2_hyst_threshold);
- /** Block normalization function to use for the particular normalization type passed to configure() */
- BlockNormFunc *_func;
- const ITensor *_input;
- ITensor *_output;
- Size2D _num_cells_per_block;
- Size2D _num_cells_per_block_stride;
- size_t _num_bins;
- float _l2_hyst_threshold;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHOGDETECTORKERNEL_H
-#define ARM_COMPUTE_NEHOGDETECTORKERNEL_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform HOG detector kernel using linear SVM */
-class NEHOGDetectorKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEHOGDetectorKernel";
- }
- /** Default constructor */
- NEHOGDetectorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGDetectorKernel(const NEHOGDetectorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHOGDetectorKernel &operator=(const NEHOGDetectorKernel &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHOGDetectorKernel(NEHOGDetectorKernel &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHOGDetectorKernel &operator=(NEHOGDetectorKernel &&) = delete;
- /** Default destructor */
- ~NEHOGDetectorKernel() = default;
-
- /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
- *
- * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref NEHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog HOG data object used by @ref NEHOGOrientationBinningKernel and @ref NEHOGBlockNormalizationKernel
- * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects
- * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
- * It must be multiple of the hog->info()->block_stride()
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
- */
- void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- IDetectionWindowArray *_detection_windows;
- const float *_hog_descriptor;
- float _bias;
- float _threshold;
- uint16_t _idx_class;
- size_t _num_bins_per_descriptor_x;
- size_t _num_blocks_per_descriptor_y;
- size_t _block_stride_width;
- size_t _block_stride_height;
- size_t _detection_window_width;
- size_t _detection_window_height;
- size_t _max_num_detection_windows;
- arm_compute::Mutex _mutex;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHOGDETECTORKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHARRISCORNERSKERNEL_H
-#define ARM_COMPUTE_NEHARRISCORNERSKERNEL_H
-
-#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
-#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Common interface for all Harris Score kernels */
-class INEHarrisScoreKernel : public INEKernel
-{
-public:
- /** Default constructor */
- INEHarrisScoreKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- INEHarrisScoreKernel(const INEHarrisScoreKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- INEHarrisScoreKernel &operator=(const INEHarrisScoreKernel &) = delete;
- /** Allow instances of this class to be moved */
- INEHarrisScoreKernel(INEHarrisScoreKernel &&) = default;
- /** Allow instances of this class to be moved */
- INEHarrisScoreKernel &operator=(INEHarrisScoreKernel &&) = default;
- /** Default destructor */
- ~INEHarrisScoreKernel() = default;
-
-public:
- /** Setup the kernel parameters
- *
- * @param[in] input1 Source image (gradient X). Data types supported: S16/S32
- * @param[in] input2 Source image (gradient Y). Data types supported: same as @ input1
- * @param[out] output Destination image (harris score). Data types supported: F32
- * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0)
- * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- virtual void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) = 0;
-
-protected:
- const IImage *_input1; /**< Source image - Gx component */
- const IImage *_input2; /**< Source image - Gy component */
- IImage *_output; /**< Source image - Harris score */
- float _sensitivity; /**< Sensitivity value */
- float _strength_thresh; /**< Threshold value */
- float _norm_factor; /**< Normalization factor */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Template NEON kernel to perform Harris Score.
- * The implementation supports 3, 5, and 7 for the block_size
- */
-template <int32_t block_size>
-class NEHarrisScoreKernel : public INEHarrisScoreKernel
-{
-public:
- const char *name() const override
- {
- return "NEHarrisScoreKernel";
- }
- /** Default constructor */
- NEHarrisScoreKernel();
- // Inherited methods overridden:
- void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override;
- BorderSize border_size() const override;
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised harris score functions */
- using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride,
- float norm_factor, float sensitivity, float strength_thresh);
- /** Harris Score function to use for the particular image types passed to configure() */
- HarrisScoreFunction *_func;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHARRISCORNERSKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the height concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class NEHeightConcatenateLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEHeightConcatenateLayerKernel";
- }
- /** Default constructor */
- NEHeightConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHeightConcatenateLayerKernel(const NEHeightConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHeightConcatenateLayerKernel &operator=(const NEHeightConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEHeightConcatenateLayerKernel(NEHeightConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEHeightConcatenateLayerKernel &operator=(NEHeightConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~NEHeightConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[in,out] output Output tensor info. Data types supported: Same as @p input.
- *
- */
- void configure(const ITensorInfo *input, unsigned int height_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEHeightConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
- unsigned int _height_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHISTOGRAMKERNEL_H
-#define ARM_COMPUTE_NEHISTOGRAMKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-#include <cstddef>
-#include <cstdint>
-
-namespace arm_compute
-{
-class IDistribution1D;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the histogram kernel */
-class NEHistogramKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEHistogramKernel";
- }
- /** Default constructor */
- NEHistogramKernel();
- /** Default destructor */
- ~NEHistogramKernel() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHistogramKernel(const NEHistogramKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEHistogramKernel &operator=(const NEHistogramKernel &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHistogramKernel(NEHistogramKernel &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEHistogramKernel &operator=(NEHistogramKernel &&) = delete;
-
- /** Set the input image and the distribution output.
- *
- * @param[in] input Source image. Data type supported: U8.
- * @param[out] output Destination distribution.
- * @param[in,out] local_hist Array that the threads use to save their local histograms.
- * It's size should be equal to (number_of_threads * num_bins),
- * and the Window::thread_id() is used to determine the part of the array
- * used by each thread.
- * @param[out] window_lut LUT with pre-calculated possible window values.
- * The size of the LUT should be equal to max_range_size and it will be filled
- * during the configure stage, while it re-used in every run, therefore can be
- * safely shared among threads.
- */
- void configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut);
- /** Set the input image and the distribution output.
- *
- * @note Used for histogram of fixed size equal to 256
- *
- * @param[in] input Source image. Data type supported: U8.
- * @param[out] output Destination distribution which must be of 256 bins..
- */
- void configure(const IImage *input, IDistribution1D *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Function to merge multiple partial histograms.
- *
- * @param[out] global_hist Pointer to the final histogram.
- * @param[in] local_hist Pointer to the partial histograms.
- * @param[in] bins Number of bins.
- */
- void merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins);
- /** Function to merge multiple minimum values of partial histograms.
- *
- * @param[out] global_min Pointer to the global min value.
- * @param[in] local_min Local min value.
- */
- void merge_min(uint8_t *global_min, const uint8_t &local_min);
- /** Function to perform histogram on the given window
- *
- * @param[in] win Region on which to execute the kernel
- * @param[in] info Info about the executing thread
- */
- void histogram_U8(Window win, const ThreadInfo &info);
- /** Function to perform histogram on the given window where histogram is
- * of fixed size 256 without ranges and offsets.
- *
- * @param[in] win Region on which to execute the kernel
- * @param[in] info Info about the executing thread
- */
- void histogram_fixed_U8(Window win, const ThreadInfo &info);
- /** Pre-calculate the pixel windowing for every possible pixel
- *
- * Calculate (V - offset) * numBins / range where V is every possible pixel value.
- *
- * @note We currently support U8 image thus possible pixel values are between 0 and 255
- */
- void calculate_window_lut() const;
- /** Common signature for all the specialised Histogram functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using HistogramFunctionPtr = void (NEHistogramKernel::*)(Window window, const ThreadInfo &info);
-
- HistogramFunctionPtr _func; ///< Histogram function to use for the particular image types passed to configure()
- const IImage *_input;
- IDistribution1D *_output;
- uint32_t *_local_hist;
- uint32_t *_window_lut;
- arm_compute::Mutex _hist_mtx;
- static constexpr unsigned int _max_range_size{ 256 }; ///< 256 possible pixel values as we handle only U8 images
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEHISTOGRAMKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEIM2COLKERNEL_H
-#define ARM_COMPUTE_NEIM2COLKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-class Size2D;
-
-/** Interface for the im2col reshape kernel.
- *
- * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column.
- * It is used to transform a convolution to a plain matrix multiplication.
- *
- * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have:
- *
- * @f[
- * \left( \begin{array}{cccc}
- * a00 & a01 & a02 & a03 \\
- * a10 & a11 & a12 & a13 \\
- * a20 & a21 & a22 & a23 \\
- * a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccc}
- * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\
- * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\
- * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\
- * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\
- * \end{array} \right)
- * @f]
- */
-class NEIm2ColKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEIm2ColKernel";
- }
- /** Default constructor */
- NEIm2ColKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEIm2ColKernel(const NEIm2ColKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEIm2ColKernel &operator=(const NEIm2ColKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEIm2ColKernel(NEIm2ColKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEIm2ColKernel &operator=(NEIm2ColKernel &&) = default;
- /** Default destructor */
- ~NEIm2ColKernel() = default;
-
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
- * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false
- * @param[out] output The output tensor. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
- */
- void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
- bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1);
- /** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
- * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
- bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run im2col
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T, bool has_pads, bool is_nchw>
- void run_im2col(const Window &window);
-
- /** Common signature for all the specialised im2col functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using Im2ColFunctionPtr = void (NEIm2ColKernel::*)(const Window &window);
-
- Im2ColFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- std::pair<unsigned int, unsigned int> _convolved_dims;
- PadStrideInfo _conv_info;
- unsigned int _kernel_width;
- unsigned int _kernel_height;
- bool _has_bias;
- Size2D _dilation;
- DataLayout _data_layout;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEIM2COLKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-struct InstanceNormalizationLayerKernelInfo;
-
-/** Interface for performing an instance normalization */
-class NEInstanceNormalizationLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEInstanceNormalizationLayerKernel";
- }
- /** Default constructor */
- NEInstanceNormalizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEInstanceNormalizationLayerKernel(const NEInstanceNormalizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEInstanceNormalizationLayerKernel &operator=(const NEInstanceNormalizationLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEInstanceNormalizationLayerKernel(NEInstanceNormalizationLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEInstanceNormalizationLayerKernel &operator=(NEInstanceNormalizationLayerKernel &&) = default;
- /** Default destructor */
- ~NEInstanceNormalizationLayerKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW
- * In case of @p output tensor = nullptr this tensor will store the result of the normalization.
- * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
- * @param[in] info Kernel meta-data descriptor
- */
- void configure(ITensor *input, ITensor *output, const InstanceNormalizationLayerKernelInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEInstanceNormalizationLayer.
- *
- * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NCHW
- * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
- * @param[in] info Kernel meta-data descriptor
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialized instance normalization functions
- *
- * @param[in, out] input An input tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
- * @param[out] output The output tensor.
- * @param[in] gamma The scale scalar value applied to the normalized tensor. Defaults to 1.0
- * @param[in] beta The offset scalar value applied to the normalized tensor. Defaults to 0.0
- * @param[in] epsilon Lower bound value for the normalization. Defaults to 1e-12
- */
- using NormalizationFunction = void(ITensor *input, ITensor *output, float gamma, float beta, float epsilon, const Window &window);
-
- NormalizationFunction *_func;
- ITensor *_input;
- ITensor *_output;
- float _gamma;
- float _beta;
- float _epsilon;
- bool _use_mixed_precision{ true };
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H
-#define ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Kernel to perform an image integral on an image */
-class NEIntegralImageKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEIntegralImageKernel";
- }
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data type supported: U8
- * @param[out] output Destination tensor. Data type supported: U32
- */
- void configure(const ITensor *input, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
- bool is_parallelisable() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H
-#define ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */
-class NEL2NormalizeLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEL2NormalizeLayerKernel";
- }
- /** Default constructor */
- NEL2NormalizeLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEL2NormalizeLayerKernel(const NEL2NormalizeLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEL2NormalizeLayerKernel &operator=(const NEL2NormalizeLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEL2NormalizeLayerKernel(NEL2NormalizeLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEL2NormalizeLayerKernel &operator=(NEL2NormalizeLayerKernel &&) = default;
- /** Default destructor */
- ~NEL2NormalizeLayerKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F16/F32.
- * @param[in] sum Sum values tensor. Data types supported: same as @p input.
- * Sum will have the same number of dimensions as input.
- * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
- * @param[in] epsilon Lower bound value for the normalization.
- */
- void configure(const ITensor *input, const ITensor *sum, ITensor *output, int axis, float epsilon);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEL2NormalizeLayerKernel.
- *
- * @param[in] input Source tensor info. Data types supported: F16/F32.
- * @param[in] sum Sum values tensor info. Data types supported: same as @p input.
- * Sum will have the same number of dimensions as input.
- * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
- * @param[in] epsilon Lower bound value for the normalization.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- const ITensor *_sum;
- ITensor *_output;
- unsigned int _actual_axis;
- float _epsilon;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_LKTRACKERKERNEL_H
-#define ARM_COMPUTE_LKTRACKERKERNEL_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <tuple>
-#include <utility>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Internal keypoint class for Lucas-Kanade Optical Flow */
-struct NELKInternalKeypoint
-{
- float x{ 0.f }; /**< x coordinate of the keypoint */
- float y{ 0.f }; /**< y coordinate of the keypoint */
- bool tracking_status{ false }; /**< the tracking status of the keypoint */
-};
-
-/** Interface for NEON Array of Internal Key Points. */
-using INELKInternalKeypointArray = IArray<NELKInternalKeypoint>;
-
-/** Interface for the Lucas-Kanade tracker kernel */
-class NELKTrackerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NELKTrackerKernel";
- }
- /** Default constructor */
- NELKTrackerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELKTrackerKernel(const NELKTrackerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELKTrackerKernel &operator=(const NELKTrackerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NELKTrackerKernel(NELKTrackerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NELKTrackerKernel &operator=(NELKTrackerKernel &&) = default;
- /** Default destructor */
- ~NELKTrackerKernel() = default;
-
- /** Initialise the kernel input and output
- *
- * @param[in] input_old Pointer to the input old tensor. Data type supported: U8
- * @param[in] input_new Pointer to the input new tensor. Data type supported. U8
- * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data type supported: S16
- * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data type supported: S16
- * @param[in] old_points Pointer to the IKeyPointArray storing old key points
- * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points
- * @param[out] new_points Pointer to the IKeyPointArray storing new key points
- * @param[in, out] old_points_internal Pointer to the array of NELKInternalKeypoint for old points
- * @param[out] new_points_internal Pointer to the array of NELKInternalKeypoint for new points
- * @param[in] termination The criteria to terminate the search of each keypoint.
- * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
- * @param[in] epsilon The error for terminating the algorithm
- * @param[in] num_iterations The maximum number of iterations before terminate the algorithm
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] level The pyramid level
- * @param[in] num_levels The number of pyramid levels
- * @param[in] pyramid_scale Scale factor used for generating the pyramid
- */
- void configure(const ITensor *input_old, const ITensor *input_new, const ITensor *old_scharr_gx, const ITensor *old_scharr_gy,
- const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, IKeyPointArray *new_points,
- INELKInternalKeypointArray *old_points_internal, INELKInternalKeypointArray *new_points_internal,
- Termination termination, bool use_initial_estimate, float epsilon, unsigned int num_iterations, size_t window_dimension,
- size_t level, size_t num_levels, float pyramid_scale);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Initialise the array of keypoints in the provide range
- *
- * @param[in] start Index of first element in the keypoints array to be initialised
- * @param[in] end Index after last elelemnt in the keypoints array to be initialised
- */
- void init_keypoints(int start, int end);
- /** Compute the structure tensor A^T * A based on the scharr gradients I_x and I_y
- *
- * @param[in] keypoint Keypoint for which gradients are computed
- * @param[out] bilinear_ix Intermediate interpolated data for X gradient
- * @param[out] bilinear_iy Intermediate interpolated data for Y gradient
- *
- * @return Values A11, A12, A22
- */
- std::tuple<int, int, int> compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int32_t *bilinear_ix, int32_t *bilinear_iy);
- /** Compute the vector A^T * b, i.e. -sum(I_d * I_t) for d in {x,y}
- *
- * @param[in] old_keypoint Old keypoint for which gradient is computed
- * @param[in] new_keypoint New keypoint for which gradient is computed
- * @param[in] bilinear_ix Intermediate interpolated data for X gradient
- * @param[in] bilinear_iy Intermediate interpolated data for Y gradient
- *
- * @return Values b1, b2
- */
- std::pair<int, int> compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int32_t *bilinear_ix, const int32_t *bilinear_iy);
-
- const ITensor *_input_old;
- const ITensor *_input_new;
- const ITensor *_old_scharr_gx;
- const ITensor *_old_scharr_gy;
- IKeyPointArray *_new_points;
- const IKeyPointArray *_new_points_estimates;
- const IKeyPointArray *_old_points;
- INELKInternalKeypointArray *_old_points_internal;
- INELKInternalKeypointArray *_new_points_internal;
- Termination _termination;
- bool _use_initial_estimate;
- float _pyramid_scale;
- float _epsilon;
- unsigned int _num_iterations;
- int _window_dimension;
- unsigned int _level;
- unsigned int _num_levels;
- ValidRegion _valid_region;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NELKTRACKERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to multiply each row of first tensor with low 2 dimensions of second tensor. */
-class NELocallyConnectedMatrixMultiplyKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NELocallyConnectedMatrixMultiplyKernel";
- }
- /** Default constructor */
- NELocallyConnectedMatrixMultiplyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELocallyConnectedMatrixMultiplyKernel(const NELocallyConnectedMatrixMultiplyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELocallyConnectedMatrixMultiplyKernel &operator=(const NELocallyConnectedMatrixMultiplyKernel &) = delete;
- /** Allow instances of this class to be moved */
- NELocallyConnectedMatrixMultiplyKernel(NELocallyConnectedMatrixMultiplyKernel &&) = default;
- /** Allow instances of this class to be moved */
- NELocallyConnectedMatrixMultiplyKernel &operator=(NELocallyConnectedMatrixMultiplyKernel &&) = default;
- /** Initialise the kernel's input and output
- *
- * @param[in] input0 First input tensor. Data types supported: F16, F32
- * @param[in] input1 Second input tensor containing the Matrix B. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- */
- void configure(const ITensor *input0, const ITensor *input1, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NELocallyConnectedMatrixMultiplyKernel
- *
- * @param[in] input0 First input tensor info. Data types supported: F16, F32
- * @param[in] input1 Second input tensor info. Data type supported: same as @p input0
- * @param[in] output Output tensor info. Data type supported: same as @p input0
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input0;
- const ITensor *_input1;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H
-#define ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Template interface for the kernel to compute magnitude and phase */
-template <MagnitudeType mag_type, PhaseType phase_type>
-class NEMagnitudePhaseKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMagnitudePhaseKernel";
- }
- /** Default constructor */
- NEMagnitudePhaseKernel();
- /** Destructor */
- ~NEMagnitudePhaseKernel() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMagnitudePhaseKernel(const NEMagnitudePhaseKernel &) = delete;
- /** Default move constructor */
- NEMagnitudePhaseKernel(NEMagnitudePhaseKernel &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMagnitudePhaseKernel &operator=(const NEMagnitudePhaseKernel &) = delete;
- /** Default move assignment operator */
- NEMagnitudePhaseKernel &operator=(NEMagnitudePhaseKernel &&) = default;
-
- /** Initialise the kernel's input, output.
- *
- * @note At least one of out1 or out2 must be set
- *
- * @param[in] gx Gradient X tensor. Data type supported: S16.
- * @param[in] gy Gradient Y tensor. Data type supported: S16.
- * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16.
- * @param[out] phase (Optional) The output tensor - Phase. Data type supported: U8.
- */
- void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Function to perform magnitude on the given window
- *
- * @param[in] window Region on which to execute the kernel
- */
- void magnitude(const Window &window);
- /** Function to perform phase on the given window
- *
- * @param[in] window Region on which to execute the kernel
- */
- void phase(const Window &window);
- /** Function to perform magnitude and phase on the given window
- *
- * @param[in] window Region on which to execute the kernel
- */
- void magnitude_phase(const Window &window);
-
-private:
- /** Common signature for all the specialised MagnitudePhase functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseKernel::*)(const Window &window);
- /** MagnitudePhase function to use for the particular formats passed to configure() */
- MagnitudePhaseFunctionPtr _func;
- const ITensor *_gx; /**< Input gradient X */
- const ITensor *_gy; /**< Input gradient Y */
- ITensor *_magnitude; /**< Output - Magnitude */
- ITensor *_phase; /**< Output - Phase */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H
-#define ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the pooling layer kernel */
-class NEMaxUnpoolingLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMaxUnpoolingLayerKernel";
- }
- /** Default constructor */
- NEMaxUnpoolingLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMaxUnpoolingLayerKernel(const NEMaxUnpoolingLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMaxUnpoolingLayerKernel &operator=(const NEMaxUnpoolingLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEMaxUnpoolingLayerKernel(NEMaxUnpoolingLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEMaxUnpoolingLayerKernel &operator=(NEMaxUnpoolingLayerKernel &&) = default;
- /** Default destructor */
- ~NEMaxUnpoolingLayerKernel() = default;
- /** Set the input and output tensors.
- *
- * @note Output shape must be equal to the shape of the original input to pool.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] indices Tensor containing the offset to store the input elements in the output tensor.
- * @ref NEPoolingLayerKernel with indices should precede this function in order to
- * properly reconstruct the output tensor.
- * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32.
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- */
- void configure(const ITensor *input, const ITensor *indices, ITensor *output, const PoolingLayerInfo &pool_info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEMaxUnpoolingLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Destination tensor info. Data types supported: Same as @p input.
- * @param[in] indices Tensor info of the indices of the maximal values. Data type supported: U32.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- */
- template <typename T>
- void unpooling2(const Window &window_input);
-
- using UnpoolingFunction = void (NEMaxUnpoolingLayerKernel::*)(const Window &window);
-
-private:
- UnpoolingFunction _func;
- const ITensor *_input;
- ITensor *_output;
- const ITensor *_indices;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEANSTDDEVKERNEL_H
-#define ARM_COMPUTE_NEMEANSTDDEVKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */
-class NEMeanStdDevKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMeanStdDevKernel";
- }
- /** Default constructor */
- NEMeanStdDevKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMeanStdDevKernel(const NEMeanStdDevKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMeanStdDevKernel &operator=(const NEMeanStdDevKernel &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMeanStdDevKernel(NEMeanStdDevKernel &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMeanStdDevKernel &operator=(NEMeanStdDevKernel &&) = delete;
- /** Default destructor */
- ~NEMeanStdDevKernel() = default;
-
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] input Input image. Data type supported: U8.
- * @param[out] mean Input average pixel value.
- * @param[out] global_sum Keeps global sum of pixel values.
- * @param[out] stddev (Optional) Output standard deviation of pixel values.
- * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values.
- */
- void configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev = nullptr, uint64_t *global_sum_squared = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
- BorderSize border_size() const override;
-
-private:
- const IImage *_input;
- float *_mean;
- float *_stddev;
- uint64_t *_global_sum;
- uint64_t *_global_sum_squared;
- arm_compute::Mutex _mtx;
- BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEMEANSTDDEVKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#include <arm_fp16.h>
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */
-class NEMeanStdDevNormalizationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMeanStdDevNormalizationKernel";
- }
- /** Default constructor */
- NEMeanStdDevNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMeanStdDevNormalizationKernel(const NEMeanStdDevNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMeanStdDevNormalizationKernel &operator=(const NEMeanStdDevNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEMeanStdDevNormalizationKernel(NEMeanStdDevNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEMeanStdDevNormalizationKernel &operator=(NEMeanStdDevNormalizationKernel &&) = default;
- /** Default destructor */
- ~NEMeanStdDevNormalizationKernel() = default;
- /** Initialise the kernel's input and outputs.
- *
- * @note If the output tensor is a nullptr, the normalization will be performed in-place.
- *
- * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr,
- * this tensor will store the result of the normalization. Data types supported: F16/F32.
- * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
- * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
- */
- void configure(ITensor *input, ITensor *output = nullptr, float epsilon = 1e-8f);
- /** Static function to check if given info will lead to a valid configuration of @ref NEMeanStdDevNormalizationKernel
- *
- * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr,
- * this tensor will store the result of the normalization. Data types supported: F16/F32.
- * @param[in] output (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input
- * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Normalizes the input with respect to mean and standard deviation.
- *
- * @param[in] window Region on which to execute the kernel.
- */
- template <typename ScalarType, int size>
- void mean_stddev_normalization(const Window &window);
-
- ITensor *_input;
- ITensor *_output;
- float _epsilon;
-
- using MeanStdDevNormFunction = void (NEMeanStdDevNormalizationKernel::*)(const Window &window);
-
- MeanStdDevNormFunction _func;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEDIAN3x3KERNEL_H
-#define ARM_COMPUTE_NEMEDIAN3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Kernel to perform a median filter on a tensor */
-class NEMedian3x3Kernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEMedian3x3Kernel";
- }
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data type supported: U8
- * @param[out] output Destination tensor. Data type supported: U8
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEMEDIAN3x3KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEMSETKERNEL_H
-#define ARM_COMPUTE_NEMEMSETKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for filling the planes of a tensor */
-class NEMemsetKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMemsetKernel";
- }
- /** Default constructor */
- NEMemsetKernel();
- /** Default destructor */
- ~NEMemsetKernel() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMemsetKernel(const NEMemsetKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMemsetKernel &operator=(const NEMemsetKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEMemsetKernel(NEMemsetKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEMemsetKernel &operator=(NEMemsetKernel &&) = default;
- /** Initialise the kernel's tensor and filling value
- *
- * @param[in,out] tensor Input tensor to fill. Supported data types: All
- * @param[in] constant_value The value used to fill the planes of the tensor
- */
- void configure(ITensor *tensor, const PixelValue &constant_value);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- ITensor *_tensor;
- PixelValue _constant_value;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEMEMSETKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEMINMAXLAYERKERNEL_H
-#define ARM_COMPUTE_NEMINMAXLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform min max search on a 3D tensor. */
-class NEMinMaxLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMinMaxLayerKernel";
- }
- /** Default constructor */
- NEMinMaxLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxLayerKernel(const NEMinMaxLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxLayerKernel &operator=(const NEMinMaxLayerKernel &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMinMaxLayerKernel(NEMinMaxLayerKernel &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMinMaxLayerKernel &operator=(NEMinMaxLayerKernel &&) = delete;
- /** Default destructor */
- ~NEMinMaxLayerKernel() = default;
-
- /** Initialise the kernel's input and outputs.
- *
- * @note output[0] = minimum
- * @note output[1] = maximum
- *
- * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data type supported: F32.
- * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum value for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: F32.
- * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
- /** Resets global minimum and maximum. */
- void reset();
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- void update_min_max(float *out_ptr, float min, float max);
- const ITensor *_input;
- ITensor *_output;
- arm_compute::Mutex _mtx;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEMINMAXLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H
-#define ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the kernel to perform min max search on an image. */
-class NEMinMaxKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMinMaxKernel";
- }
- /** Default constructor */
- NEMinMaxKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxKernel(const NEMinMaxKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxKernel &operator=(const NEMinMaxKernel &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMinMaxKernel(NEMinMaxKernel &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMinMaxKernel &operator=(NEMinMaxKernel &&) = delete;
- /** Default destructor */
- ~NEMinMaxKernel() = default;
-
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] input Input Image. Data types supported: U8/S16/F32.
- * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- */
- void configure(const IImage *input, void *min, void *max);
- /** Resets global minimum and maximum. */
- void reset();
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Performs the min/max algorithm on U8 images on a given window.
- *
- * @param win The window to run the algorithm on.
- */
- void minmax_U8(Window win);
- /** Performs the min/max algorithm on S16 images on a given window.
- *
- * @param win The window to run the algorithm on.
- */
- void minmax_S16(Window win);
- /** Performs the min/max algorithm on F32 images on a given window.
- *
- * @param win The window to run the algorithm on.
- */
- void minmax_F32(Window win);
- /** Common signature for all the specialised MinMax functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using MinMaxFunction = void (NEMinMaxKernel::*)(Window window);
- /** MinMax function to use for the particular image types passed to configure() */
- MinMaxFunction _func;
- /** Helper to update min/max values **/
- template <typename T>
- void update_min_max(T min, T max);
-
- const IImage *_input; /**< Input image. */
- void *_min; /**< Minimum value. */
- void *_max; /**< Maximum value. */
- arm_compute::Mutex _mtx; /**< Mutex used for result reduction. */
-};
-
-/** Interface for the kernel to find min max locations of an image. */
-class NEMinMaxLocationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMinMaxLocationKernel";
- }
- /** Default constructor */
- NEMinMaxLocationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxLocationKernel(const NEMinMaxLocationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxLocationKernel &operator=(const NEMinMaxLocationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEMinMaxLocationKernel(NEMinMaxLocationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEMinMaxLocationKernel &operator=(NEMinMaxLocationKernel &&) = default;
- /** Default destructor */
- ~NEMinMaxLocationKernel() = default;
-
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] input Input Image. Data types supported: U8/S16/F32.
- * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] min_loc Array of minimum value locations.
- * @param[out] max_loc Array of maximum value locations.
- * @param[out] min_count Number of minimum value encounters.
- * @param[out] max_count Number of maximum value encounters.
- */
- void configure(const IImage *input, void *min, void *max,
- ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr,
- uint32_t *min_count = nullptr, uint32_t *max_count = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- bool is_parallelisable() const override;
-
-private:
- /** Performs the min/max location algorithm on T type images on a given window.
- *
- * @param win The window to run the algorithm on.
- */
- template <class T, bool count_min, bool count_max, bool loc_min, bool loc_max>
- void minmax_loc(const Window &win);
- /** Common signature for all the specialised MinMaxLoc functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using MinMaxLocFunction = void (NEMinMaxLocationKernel::*)(const Window &window);
- /** MinMaxLoc function to use for the particular image types passed to configure() */
- MinMaxLocFunction _func;
- /** Helper to create a function pointer table for the parameterized MinMaxLocation functions. */
- template <class T, typename>
- struct create_func_table;
-
- const IImage *_input; /**< Input image. */
- void *_min; /**< Minimum value. */
- void *_max; /**< Maximum value. */
- uint32_t *_min_count; /**< Count of minimum value encounters. */
- uint32_t *_max_count; /**< Count of maximum value encounters. */
- ICoordinates2DArray *_min_loc; /**< Locations of minimum values. */
- ICoordinates2DArray *_max_loc; /**< Locations of maximum values. */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NENONLINEARFILTERKERNEL_H
-#define ARM_COMPUTE_NENONLINEARFILTERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to apply a non-linear filter */
-class NENonLinearFilterKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NENonLinearFilterKernel";
- }
- /** Default constructor */
- NENonLinearFilterKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENonLinearFilterKernel(NENonLinearFilterKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &) = delete;
- /** Allow instances of this class to be moved */
- NENonLinearFilterKernel(NENonLinearFilterKernel &&) = default;
- /** Allow instances of this class to be moved */
- NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &&) = default;
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data type supported: U8
- * @param[out] output Destination tensor. Data type supported: U8
- * @param[in] function Non linear function to perform
- * @param[in] mask_size Mask size. Supported sizes: 3, 5
- * @param[in] pattern Mask pattern
- * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Fill mask with the corresponding given pattern.
- *
- * @param[in,out] mask Mask to be filled according to pattern
- * @param[in] cols Columns (width) of mask
- * @param[in] rows Rows (height) of mask
- * @param[in] pattern Pattern to fill the mask according to
- */
- void fill_mask(uint8_t *mask, int cols, int rows, MatrixPattern pattern);
- /** Apply a median filter when given mask pattern is defined as box.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void median_filter_box(const Window &win);
- /** Apply a min filter when given mask pattern is defined as box.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void min_filter_box(const Window &win);
- /** Apply a max filter when given mask pattern is defined as box.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void max_filter_box(const Window &win);
- /** Apply a median filter when given mask pattern is defined as cross.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void median_filter_cross(const Window &win);
- /** Apply a min filter when given mask pattern is defined as cross.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void min_filter_cross(const Window &win);
- /** Apply a max filter when given mask pattern is defined as cross.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void max_filter_cross(const Window &win);
- /** Apply a median filter when given mask pattern is defined as disk.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void median_filter_disk(const Window &win);
- /** Apply a min filter when given mask pattern is defined as disk.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void min_filter_disk(const Window &win);
- /** Apply a max filter when given mask pattern is defined as disk.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void max_filter_disk(const Window &win);
- /** Apply a non-linear filter when given mask has user-defined pattern.
- *
- * @param[in] win Window to apply the filter on.
- */
- template <int mask_w, int mask_h>
- void non_linear_filter_generic(const Window &win);
-
-private:
- unsigned int _border_width;
- const ITensor *_input;
- ITensor *_output;
- const uint8_t *_mask;
- MatrixPattern _pattern;
- NonLinearFilterFunction _function;
- unsigned int _func_idx;
- BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NENONLINEARFILTERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H
-#define ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface to perform Non-Maxima suppression over a 3x3 window using NEON
- *
- * @note Used by @ref NEFastCorners and @ref NEHarrisCorners
- */
-class NENonMaximaSuppression3x3Kernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NENonMaximaSuppression3x3Kernel";
- }
- /** Default constructor */
- NENonMaximaSuppression3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENonMaximaSuppression3x3Kernel(const NENonMaximaSuppression3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENonMaximaSuppression3x3Kernel &operator=(const NENonMaximaSuppression3x3Kernel &) = delete;
- /** Allow instances of this class to be moved */
- NENonMaximaSuppression3x3Kernel(NENonMaximaSuppression3x3Kernel &&) = default;
- /** Allow instances of this class to be moved */
- NENonMaximaSuppression3x3Kernel &operator=(NENonMaximaSuppression3x3Kernel &&) = default;
- /** Default destructor */
- ~NENonMaximaSuppression3x3Kernel() = default;
-
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8/F32
- * @param[out] output Destination tensor. Data types supported: same as @p input
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-protected:
- /** Common signature for all the specialised non-maxima suppression 3x3 functions
- *
- * @param[in] input_ptr Pointer to the input tensor.
- * @param[out] output_ptr Pointer to the output tensor
- * @param[in] input_stride Stride of the input tensor
- */
- using NonMaxSuppr3x3Function = void(const void *__restrict input_ptr, void *__restrict output_ptr, const uint32_t input_stride);
-
- NonMaxSuppr3x3Function *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */
- const ITensor *_input; /**< Source tensor */
- ITensor *_output; /**< Destination tensor */
-};
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32
- */
-class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel
-{
-public:
- const char *name() const override
- {
- return "NENonMaximaSuppression3x3FP16Kernel";
- }
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8/F32.
- * @param[out] output Destination tensor. Data types supported: same as @p input
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
-};
-#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */
-using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-} // namespace arm_compute
-#endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the normalization layer kernel.
- */
-class NENormalizationLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NENormalizationLayerKernel";
- }
- /** Default constructor */
- NENormalizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENormalizationLayerKernel(const NENormalizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENormalizationLayerKernel &operator=(const NENormalizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- NENormalizationLayerKernel(NENormalizationLayerKernel &&) = default;
- /** Default move assignment operator */
- NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default;
- /** Default destructor */
- ~NENormalizationLayerKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC.
- * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * Data type and layout supported: same as @p input.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input.
- * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
- */
- void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref NENormalizationLayerKernel
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC.
- * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * Data type and layout supported: same as @p input.
- * @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input.
- * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, NormalizationLayerInfo norm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Function to perform normalization depending on the given template
- * dimension. The second template parameter specifies whether the
- * normalization has to be 1D or 2D.
- *
- * @note Only supported normalizations are:
- * - 1D over X or Z
- * - 2D over X and Y
- *
- * @param[in] window Region on which to execute the kernel.
- */
- template <typename T, unsigned int S, unsigned int dim, bool do_2D_norm>
- void normalize_float(const Window &window);
-
- /** Common signature for all the specialised normalization functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using NormalizationFunction = void (NENormalizationLayerKernel::*)(const Window &window);
-
-private:
- NormalizationFunction _func;
- const ITensor *_input;
- const ITensor *_input_squared;
- ITensor *_output;
- NormalizationLayerInfo _norm_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPADLAYERKERNEL_H
-#define ARM_COMPUTE_NEPADLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to add padding to a tensor
- *
- * Add padding given padding information
- */
-class NEPadLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEPadLayerKernel";
- }
- /** Default constructor */
- NEPadLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPadLayerKernel(const NEPadLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPadLayerKernel &operator=(const NEPadLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEPadLayerKernel(NEPadLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEPadLayerKernel &operator=(NEPadLayerKernel &&) = default;
- /** Default destructor */
- ~NEPadLayerKernel() = default;
-
- /** Initialize the function
- *
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: same as @p input
- * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
- * specifies the front and the end padding in the i-th dimension.
- * @param[in] constant_value (Optional) Constant value to be used for the padding
- * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT.
- * Only CONSTANT padding mode is currently supported
- */
- void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
- /** Static function to check if given info will lead to a valid configuration of @ref NEPadLayer.
- *
- * @param[in] input Source tensor info. Data types supported: All.
- * @param[in] output Output tensor info. Data type supported: same as @p input
- * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
- * specifies the front and the end padding in the i-th dimension.
- * @param[in] constant_value (Optional) Constant value to be used for the padding
- * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT.
- * Only CONSTANT padding mode is currently supported
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the padding function with constant padding
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run_pad_constant(const Window &window);
-
- /** Function to run the padding function with constant padding for 3D input and 1D, 2D, 3D padding
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- void run_pad_constant_uint8_3Dinput_3Dpad(const Window &window);
-
- /** Common signature for all the specialised permute functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using PadFunctionPtr = void (NEPadLayerKernel::*)(const Window &window);
-
- PadFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- PaddingList _padding;
- PixelValue _constant_value;
- PaddingMode _mode;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEPADLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPERMUTEKERNEL_H
-#define ARM_COMPUTE_NEPERMUTEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** NEON kernel to perform tensor permutation.
- *
- * Permutes given a permutation vector
- */
-class NEPermuteKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEPermuteKernel";
- }
- /** Default constructor */
- NEPermuteKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPermuteKernel(const NEPermuteKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPermuteKernel &operator=(const NEPermuteKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEPermuteKernel(NEPermuteKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEPermuteKernel &operator=(NEPermuteKernel &&) = default;
- /** Default destructor */
- ~NEPermuteKernel() = default;
-
- /** Set the input and output of the kernel.
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] input The input tensor to permute. Data types supported: All
- * @param[out] output The output tensor. Data types supported: Same as @p input
- * @param[in] perm Permutation vector
- */
- void configure(const ITensor *input, ITensor *output, const PermutationVector &perm);
- /** Static function to check if given info will lead to a valid configuration of @ref CPPPermuteKernel
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] input The input tensor to permute. Data types supported: All
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] perm Permutation vector
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Template function to run the permute
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run_permute(const Window &window);
-
- /** Common signature for all the specialised permute functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using PermuteFunctionPtr = void (NEPermuteKernel::*)(const Window &window);
-
- PermuteFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- PermutationVector _perm;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEPERMUTEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H
-#define ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform addition between two tensors */
-class NEPixelWiseMultiplicationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEPixelWiseMultiplicationKernel";
- }
- /** Default constructor */
- NEPixelWiseMultiplicationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPixelWiseMultiplicationKernel(const NEPixelWiseMultiplicationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPixelWiseMultiplicationKernel &operator=(const NEPixelWiseMultiplicationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEPixelWiseMultiplicationKernel(NEPixelWiseMultiplicationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEPixelWiseMultiplicationKernel &operator=(NEPixelWiseMultiplicationKernel &&) = default;
- /** Default destructor */
- ~NEPixelWiseMultiplicationKernel() = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * Valid configurations (Input1,Input2) -> Output :
- *
- * Support: Broadcast? Scale=1/255?
- * - (U8,U8) -> U8, S16 N Y
- * - (U8,S16) -> S16 N Y
- * - (S16,U8) -> S16 N Y
- * - (S16,S16) -> S16 N Y
- * - (S32,S32) -> S32 Y N
- * - (F16,F16) -> F16 N Y
- * - (F32,F32) -> F32 Y Y
- * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y
- * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y
- *
- * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
- * For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
- *
- * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
- * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255
- * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype
- * @param[in] rounding_policy Rounding policy.
- */
- void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
- /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplicationKernel
- *
- * Valid configurations (Input1,Input2) -> Output :
- * Support: Broadcast? Scale=1/255?
- * - (U8,U8) -> U8, S16 N Y
- * - (U8,S16) -> S16 N Y
- * - (S16,U8) -> S16 N Y
- * - (S16,S16) -> S16 N Y
- * - (S32,S32) -> S32 Y N
- * - (F16,F16) -> F16 N Y
- * - (F32,F32) -> F32 Y Y
- * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y
- * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y
- *
- * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
- * For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
- *
- * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
- * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
- * @param[in] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255
- * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype
- * @param[in] rounding_policy Rounding policy.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
-
- // Inherited methods overridden
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised multiplication functions with integer scaling factor
- *
- * @param[in] in1 Input1 tensor object.
- * @param[in] in2 Input2 tensor object.
- * @param[out] out Output tensor object.
- * @param[in] window Region on which to execute the kernel
- * @param[in] scale Integer scale factor.
- */
- using MulFunctionInt = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, int scale);
- /** Common signature for all the specialised multiplication functions with float scaling factor
- *
- * @param[in] in1 Input1 tensor object.
- * @param[in] in2 Input2 tensor object.
- * @param[out] out Output tensor object.
- * @param[in] window Region on which to execute the kernel
- * @param[in] scale Float scale factor.
- */
- using MulFunctionFloat = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, float scale);
- /** Common signature for all the specialised QASYMM8 multiplication functions with float scaling factor
- *
- * @param[in] in1 Input1 tensor object.
- * @param[in] in2 Input2 tensor object.
- * @param[out] out Output tensor object.
- * @param[in] window Region on which to execute the kernel
- * @param[in] scale Float scale factor.
- *
- */
- using MulFunctionQuantized = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, float scale);
-
- MulFunctionFloat *_func_float;
- MulFunctionInt *_func_int;
- MulFunctionQuantized *_func_quantized;
-
-private:
- float _scale;
- int _scale_exponent;
-};
-
-/** Interface for the complex pixelwise multiplication kernel. */
-class NEComplexPixelWiseMultiplicationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEComplexPixelWiseMultiplicationKernel";
- }
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
- * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- */
- void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplicationKernel
- *
- * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
- * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-};
-
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H
-#define ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the pooling layer kernel */
-class NEPoolingLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEPoolingLayerKernel";
- }
- /** Default constructor */
- NEPoolingLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPoolingLayerKernel(const NEPoolingLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPoolingLayerKernel &operator=(const NEPoolingLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEPoolingLayerKernel(NEPoolingLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEPoolingLayerKernel &operator=(NEPoolingLayerKernel &&) = default;
- /** Default destructor */
- ~NEPoolingLayerKernel() = default;
- /** Set the input and output tensors.
- *
- * @note F16 are supported for pool sizes 2 and 3 only
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
- */
- void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info, ITensor *indices = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref NEPoolingLayerKernel
- *
- * @note F16 are supported for pool sizes 2 and 3 only
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Function to perform 2x2 pooling.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void pooling2_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- */
- void pooling2_f32_nhwc_maxpool_indices(const Window &window_input, const Window &window);
- /** Function to perform MxN pooling for 32-bit floating point values.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void poolingMxN_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform MxN pooling for 32-bit floating point values (NHWC).
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void poolingMxN_f32_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform 7x7 pooling.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void pooling7_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform 3x3 pooling.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void pooling3_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform 2x2 pooling for float16_t.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void pooling2_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform 2x2 pooling and compute the pooling indices for FP32/FP16. The indices can be used for max unpool.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- */
- template <typename T>
- void pooling2_nchw_maxpool_indices(const Window &window_input, const Window &window);
- /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- */
- void pooling2_f16_nhwc_maxpool_indices(const Window &window_input, const Window &window);
- /** Function to perform 3x3 pooling.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void pooling3_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform MxN pooling for 16-bit floating point values.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void poolingMxN_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Function to perform MxN pooling for 16-bit floating point values. (NHWC)
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- void poolingMxN_f16_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Template function to perform 2x2 pooling for 8bit quantized fixed point. (NCHW)
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- template <typename T>
- void pooling2_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Template function to perform 3x3 pooling for 8bit quantized fixed point. (NCHW)
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- template <typename T>
- void pooling3_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Template function to perform MxN pooling for 8-bit quantized. (NCHW)
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- template <typename T>
- void poolingMxN_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Template function to perform MxN pooling for 8-bit quantized. (NHWC)
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- template <typename T>
- void poolingMxN_q8_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
- /** Common signature for all the specialised Pooling functions
- *
- * @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
- * @param[in] pooling_type Pooling operation to be computed.
- * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
- */
- using PoolingFunction = void (NEPoolingLayerKernel::*)(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding);
-
-private:
- PoolingFunction _func;
- const ITensor *_input;
- ITensor *_output;
- ITensor *_indices;
- PoolingLayerInfo _pool_info;
- DataLayout _data_layout;
- unsigned int _num_elems_processed_per_iteration;
- BorderSize _border_size;
- bool _is_square;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H
-#define ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to calculate prior boxes */
-class NEPriorBoxLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEPriorBoxLayerKernel";
- }
- /** Default constructor */
- NEPriorBoxLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPriorBoxLayerKernel(const NEPriorBoxLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPriorBoxLayerKernel &operator=(const NEPriorBoxLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEPriorBoxLayerKernel(NEPriorBoxLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEPriorBoxLayerKernel &operator=(NEPriorBoxLayerKernel &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
- * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1
- * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input
- * @param[in] info Prior box layer info.
- */
- void configure(const ITensor *input1, const ITensor *input2, ITensor *output, const PriorBoxLayerInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEPriorBoxLayerKernel
- *
- * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC.
- * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1
- * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input
- * @param[in] info Prior box layer info.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Stores the coordinates of the calculated prior boxes.
- *
- * @param[out] out Output pointer.
- * @param[in] offset Output offset to write to.
- * @param[in] center_x Center pixel value on x-axis.
- * @param[in] center_y Center pixel value on y-axis.
- * @param[in] box_width Prior box width.
- * @param[in] box_height Prior box height.
- * @param[in] width Input width.
- * @param[in] height Input height.
- */
- void store_coordinates(float *out, const int offset, const float center_x, const float center_y, const float box_width, const float box_height, const int width, const int height);
- /** Function to calculate prior boxes.
- *
- * @param[in] window Input region on which to execute the kernel.
- */
- void calculate_prior_boxes(const Window &window);
-
- const ITensor *_input1;
- const ITensor *_input2;
- ITensor *_output;
- PriorBoxLayerInfo _info;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include <functional>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform layer normalization */
-class NEQLSTMLayerNormalizationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEQLSTMLayerNormalizationKernel";
- }
- /** Default constructor */
- NEQLSTMLayerNormalizationKernel() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEQLSTMLayerNormalizationKernel(const NEQLSTMLayerNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEQLSTMLayerNormalizationKernel &operator=(const NEQLSTMLayerNormalizationKernel &) = delete;
- /** Default Move Constructor. */
- NEQLSTMLayerNormalizationKernel(NEQLSTMLayerNormalizationKernel &&) = default;
- /** Default move assignment operator */
- NEQLSTMLayerNormalizationKernel &operator=(NEQLSTMLayerNormalizationKernel &&) = default;
- /** Default destructor */
- ~NEQLSTMLayerNormalizationKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QSYMM16.
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] weight Weight tensor. Data types supported: Same as @p input.
- * @param[in] bias Bias tensor. Data types supported: S32
- */
- void configure(const ITensor *input, ITensor *output, const ITensor *weight, const ITensor *bias);
- /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayerNormalizationKernel
- *
- * @param[in] input Source tensor info. Data types supported: QSYMM16.
- * @param[in] output Destination tensor info. Data types supported: Same as @p input.
- * @param[in] weight Weight tensor info. Data types supported: Same as @p input.
- * @param[in] bias Bias tensor info. Data types supported: S32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias);
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- // constants
- static constexpr uint32_t max_input_dimension{ 2 }; /**< The maximum input dimension supported */
- static constexpr uint32_t max_weight_dimension{ 1 }; /**< The maximum weight dimension supported */
- static constexpr uint32_t max_bias_dimension{ 1 }; /**< The maximum bias dimension supported */
- static constexpr uint32_t vector_size_byte{ 16 }; /**< Computation vector size in byte */
-
- using ComputeFuncType = std::function<void(NEQLSTMLayerNormalizationKernel &)>;
-
- ComputeFuncType _fn{}; /**< Function pointer to computation function */
-
- const ITensor *_input{ nullptr }; /**< Input tensor */
- const ITensor *_weight{ nullptr }; /**< Weight tensor */
- const ITensor *_bias{ nullptr }; /**< Bias tensor */
- ITensor *_output{ nullptr }; /**< Output tensor */
-
- int32_t _output_multiplier{}; /**< Multiplier for output values */
- int32_t _output_shift{}; /**< Shift value for output values */
-
- int32_t _window_start_x{}; /**< The beginning of x-axis iteration */
- int32_t _window_end_x{}; /**< The end of x-axis iteration */
- int32_t _window_step_x{}; /**< The size of x-axis iteration's step */
-
- Window _inout_window{}; /**< Window for input and output tensor */
- Window _weight_window{}; /**< Window for weight and bias tensor */
-
- /** Function to configure initial windows for destination of computation
- *
- * @param[in] Target destination tensor to use for output window
- *
- * @return configured window
- */
- Window configure_window(ITensor *target);
- // Function to compute for data type QSYMM16
- void compute_qsymm16();
- /** Function to compute summation and summation of squared input of the given input pointer
- *
- * @param[in] Input_ptr pointer to input array
- *
- */
- std::pair<int64_t, int64_t> sum_qsymm16(const int16_t *input_ptr);
- /** Function to normalize values using computed mean and standard deviation
- *
- * @param[in] input_ptr Pointer to input array
- * @param[in] output_ptr Pointer to output array
- * @param[in] weight_ptr Pointer to weight array
- * @param[in] bias_ptr Pointer to bias array
- * @param[in] mean Mean value
- * @param[in] inv_std_mul Quantized multiplier for standard deviation
- * @param[in] inv_std_shift Shift for standard deviation
- *
- */
- void normalize_qasymm16(const int16_t *input_ptr,
- int16_t *output_ptr,
- const int16_t *weight_ptr,
- const int32_t *bias_ptr,
- int32_t mean, int32_t inv_std_mul, int32_t inv_std_shift);
- /** Function to compute output quantization information */
- QuantizationInfo compute_output_qinfo();
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the quantization layer kernel.
- *
- * @note The implementation supports only 3D input tensors
- *
- */
-class NEQuantizationLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEQuantizationLayerKernel";
- }
- /** Default constructor */
- NEQuantizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEQuantizationLayerKernel(const NEQuantizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEQuantizationLayerKernel &operator=(const NEQuantizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- NEQuantizationLayerKernel(NEQuantizationLayerKernel &&) = default;
- /** Default move assignment operator */
- NEQuantizationLayerKernel &operator=(NEQuantizationLayerKernel &&) = default;
- /** Default destructor */
- ~NEQuantizationLayerKernel() = default;
- /** Set the input, output.
- *
- * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
- * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
- *
- * @note Output auto initialization is not supported by this kernel
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
- * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised @ref NEQuantizationLayerKernel functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window);
- /** Function to apply QASYMM8 or QASYMM8_SIGNED quantization on a tensor.
- *
- * @param[in] window Region on which to execute the kernel.
- */
- template <typename TIn, typename TOut>
- void run_quantize_qasymm8(const Window &window);
- /** Function to apply QASYMM16 quantization on a tensor.
- *
- * @param[in] window Region on which to execute the kernel.
- */
- template <typename T>
- void run_quantize_qasymm16(const Window &window);
-
- const ITensor *_input;
- ITensor *_output;
-
- QuantizationFunctionExecutorPtr _func;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H
-#define ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the RoIAlign kernel.
- */
-class NEROIAlignLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEROIAlignLayerKernel";
- }
-
- /** Constructor */
- NEROIAlignLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEROIAlignLayerKernel(const NEROIAlignLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEROIAlignLayerKernel &operator=(const NEROIAlignLayerKernel &) = delete;
- /** Default Move Constructor. */
- NEROIAlignLayerKernel(NEROIAlignLayerKernel &&) = default;
- /** Default move assignment operator. */
- NEROIAlignLayerKernel &operator=(NEROIAlignLayerKernel &&) = default;
- /** Default destructor */
- ~NEROIAlignLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
- * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
- * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
- * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, otherwise same as @p input
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
- */
- void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEROIAlignLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
- * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8,
- * otherwise same as @p input
- * @param[in] output Destination tensor info. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- template <DataLayout data_layout, typename input_data_type, typename roi_data_type = input_data_type>
- void internal_run(const Window &window, const ThreadInfo &info);
-
- const ITensor *_input;
- ITensor *_output;
- const ITensor *_rois;
- ROIPoolingLayerInfo _pool_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H*/
+++ /dev/null
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H
-#define ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include "arm_compute/core/IArray.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the ROI pooling layer kernel */
-class NEROIPoolingLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEROIPoolingLayerKernel";
- }
- /** Default constructor */
- NEROIPoolingLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEROIPoolingLayerKernel(const NEROIPoolingLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEROIPoolingLayerKernel &operator=(const NEROIPoolingLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEROIPoolingLayerKernel(NEROIPoolingLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEROIPoolingLayerKernel &operator=(NEROIPoolingLayerKernel &&) = default;
- /** Default destructor */
- ~NEROIPoolingLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F32.
- * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
- * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois tensor.
- */
- void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- const ITensor *_rois;
- ITensor *_output;
- ROIPoolingLayerInfo _pool_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NERANGEKERNEL_H
-#define ARM_COMPUTE_NERANGEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Kernel class for Range
- *
- * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments
- * of 'step' up to but not including 'end'.
- */
-class NERangeKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NERangeKernel";
- }
- /** Default constructor */
- NERangeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NERangeKernel(const NERangeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NERangeKernel &operator=(const NERangeKernel &) = delete;
- /** Allow instances of this class to be moved */
- NERangeKernel(NERangeKernel &&) = default;
- /** Allow instances of this class to be moved */
- NERangeKernel &operator=(NERangeKernel &&) = default;
- /** Default destructor */
- ~NERangeKernel() = default;
- /** Initialize the kernel's output tensor, start, end and step of the sequence.
- *
- * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
- * @param[in] start The starting value of the sequence.
- * @param[in] end The ending (not including) value of the sequence.
- * @param[in] step The gap between each pair of values in the sequence.
- */
- void configure(ITensor *output, float start, float end, float step);
- /** Static function to check if given info will lead to a valid configuration of @ref NERangeKernel
- *
- * @param[in] output Output tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
- * @param[in] start The starting value of the sequence.
- * @param[in] end The ending (not including) value of the sequence.
- * @param[in] step The gap between each pair of values in the sequence.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *output, float start, float end, float step);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- using RangeFunction = void(ITensor *output, float start, float step, const Window &window);
-
- RangeFunction *_func; /**< Range function to be called */
- float _start; /**< Start of sequence */
- float _end; /**< End of sequence */
- float _step; /**< Increment/step value */
- ITensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NERANGEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H
-#define ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a reduction operation
- *
- * @note For ARG_MIN/ARG_MAX reduction, the default data type for an uninitialized
- * output tensor is signed 32-bit integer (S32). It is the user's responsibility
- * to check that the results do not overflow because the indices are computed
- * in unsigned 32-bit (U32).
- */
-class NEReductionOperationKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEReductionOperationKernel";
- }
- /** Default constructor */
- NEReductionOperationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEReductionOperationKernel(const NEReductionOperationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEReductionOperationKernel &operator=(const NEReductionOperationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEReductionOperationKernel(NEReductionOperationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEReductionOperationKernel &operator=(NEReductionOperationKernel &&) = default;
- /** Default destructor */
- ~NEReductionOperationKernel() = default;
-
- /** Set the source, destination of the kernel
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW.
- * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0
- * @param[in] op Reduction operation to perform.
- */
- void configure(const ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel.
- *
- * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW.
- * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0
- * @param[in] op Reduction operation to perform.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- unsigned int _reduction_axis;
- ReductionOperation _op;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREMAPKERNEL_H
-#define ARM_COMPUTE_NEREMAPKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a remap on a tensor */
-class NERemapKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NERemapKernel";
- }
- /** Default constructor */
- NERemapKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NERemapKernel(const NERemapKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NERemapKernel &operator=(const NERemapKernel &) = delete;
- /** Allow instances of this class to be moved */
- NERemapKernel(NERemapKernel &&) = default;
- /** Allow instances of this class to be moved */
- NERemapKernel &operator=(NERemapKernel &&) = default;
- /** Default destructor */
- ~NERemapKernel() = default;
-
- /** Initialize the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[in] map_x Map for X coordinates. Data type supported: F32.
- * @param[in] map_y Map for Y coordinates. Data type supported: F32.
- * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- */
- void configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** function to perform nearest interpolation on the given window */
- void remap_nearest(const Window &window);
- /** function to perform bilinear interpolation on the given window */
- void remap_bilinear(const Window &window);
- /** Remap function to use for the particular interpolation type passed to configure() */
- void (NERemapKernel::*_func)(const Window &window);
-
- const ITensor *_input; /**< Input image */
- ITensor *_output; /**< Output image */
- const ITensor *_map_x; /**< Input remap x coordinates */
- const ITensor *_map_y; /**< Input remap y coordinates */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEREMAPKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREORGLAYERKERNEL_H
-#define ARM_COMPUTE_NEREORGLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to perform tensor re-organization */
-class NEReorgLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEReorgLayerKernel";
- }
- /** Default constructor */
- NEReorgLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEReorgLayerKernel(const NEReorgLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEReorgLayerKernel &operator=(const NEReorgLayerKernel &) = delete;
- /** Default Move Constructor. */
- NEReorgLayerKernel(NEReorgLayerKernel &&) = default;
- /** Default move assignment operator */
- NEReorgLayerKernel &operator=(NEReorgLayerKernel &&) = default;
- /** Default destructor */
- ~NEReorgLayerKernel() = default;
- /** Set the input and output of the kernel
- *
- * @param[in] input Source tensor. Data type supported: All
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- * @param[in] stride Stride to be used during data re-organization.
- * It defines the spatial distance between 2 consecutive pixels in the x and y direction
- */
- void configure(const ITensor *input, ITensor *output, int32_t stride);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel
- *
- * @param[in] input Source tensor info. Data type supported: All
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- * @param[in] stride Stride to be used during data re-organization
- * It defines the spatial distance between 2 consecutive pixels in the x and y direction
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- int32_t _stride;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEREORGLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NERESHAPELAYERKERNEL_H
-#define ARM_COMPUTE_NERESHAPELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to perform tensor reshaping */
-class NEReshapeLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEReshapeLayerKernel";
- }
- /** Set the input and output info of the kernel
- *
- * @param[in] input Source tensor info. Data type supported: All
- * @param[out] output Destination tensor info. Data type supported: Same as @p input
- */
- void configure(const ITensorInfo *input, ITensorInfo *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel
- *
- * @param[in] input Source tensor info. Data type supported: All
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NERESHAPELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREVERSEKERNEL_H
-#define ARM_COMPUTE_NEREVERSEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the reverse layer kernel. */
-class NEReverseKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEReverseKernel";
- }
- /** Default constructor */
- NEReverseKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEReverseKernel(const NEReverseKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEReverseKernel &operator=(const NEReverseKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEReverseKernel(NEReverseKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEReverseKernel &operator=(NEReverseKernel &&) = default;
- /** Default destructor */
- ~NEReverseKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
- */
- void configure(const ITensor *input, ITensor *output, const ITensor *axis);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] output Output tensor info. Data type supported: Same as @p input
- * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- const ITensor *_axis;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEREVERSEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESCALEKERNEL_H
-#define ARM_COMPUTE_NESCALEKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform scaling on a tensor */
-class NEScaleKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEScaleKernel";
- }
- /** Default constructor */
- NEScaleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEScaleKernel(const NEScaleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEScaleKernel &operator=(const NEScaleKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEScaleKernel(NEScaleKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEScaleKernel &operator=(NEScaleKernel &&) = default;
- /** Default destructor */
- ~NEScaleKernel() = default;
-
- /** Initialise the kernel's inputs, output and interpolation policy
- *
- * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
- * @note Using @p policy Area only supports data layout NCHW and input data type U8.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32.
- * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
- * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
- * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
- * @param[out] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info @ref ScaleKernelInfo to use for configuration
- */
- void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output,
- const ScaleKernelInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEScaleKernel
- *
- * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
- * @note Using @p policy Area only supports data layout NCHW and input data type U8.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32.
- * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
- * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
- * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
- * @param[in] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info @ref ScaleKernelInfo to use for validation
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy, const ITensorInfo *offsets, ITensorInfo *output,
- const ScaleKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** function to perform scale using area interpolation on the given window
- *
- * @note Used only in case down-sampling.
- */
- void scale_area_nchw_u8(const Window &window);
-
- /** function to perform scale using bilinear interpolation on the given window */
- template <typename T>
- void scale_bilinear_nchw(const Window &window);
- /** function to perform scale using bilinear interpolation on the given window */
- template <typename T>
- void scale_bilinear_nhwc(const Window &window);
- /** function to perform scale using bilinear interpolation on the given window */
- template <typename T>
- void scale_bilinear_qasymm(const Window &window);
-
- /** function to perform scale using nearest neighbour on the given window */
- template <typename T>
- void scale_nearest_nchw(const Window &window);
- /** function to perform scale using nearest neighbour on the given window */
- template <typename T>
- void scale_nearest_nhwc(const Window &window);
-
- /** Scale function to use for the particular function to use */
- using ScaleFunctionPtr = void (NEScaleKernel::*)(const Window &window);
-
- ScaleFunctionPtr _func;
- const ITensor *_offsets;
- const ITensor *_dx;
- const ITensor *_dy;
- const ITensor *_input;
- ITensor *_output;
- InterpolationPolicy _policy;
- BorderMode _border_mode;
- PixelValue _constant_border_value;
- float _sampling_offset;
- bool _align_corners;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESCALEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESCHARR3x3KERNEL_H
-#define ARM_COMPUTE_NESCHARR3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run a 3x3 Scharr filter on a tensor.
- *
-* @f[
-* \mathbf{G}_x=\begin{vmatrix}
-* -3 & 0 & +3\\
-* -10& 0 & +10\\
-* -3 & 0 & +3
-* \end{vmatrix}
-* @f]
-*/
-class NEScharr3x3Kernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEScharr3x3Kernel";
- }
- /** Default constructor */
- NEScharr3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEScharr3x3Kernel(const NEScharr3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEScharr3x3Kernel &operator=(const NEScharr3x3Kernel &) = delete;
- /** Allow instances of this class to be moved */
- NEScharr3x3Kernel(NEScharr3x3Kernel &&) = default;
- /** Allow instances of this class to be moved */
- NEScharr3x3Kernel &operator=(NEScharr3x3Kernel &&) = default;
- /** Default destructor */
- ~NEScharr3x3Kernel() = default;
-
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- bool _run_scharr_x; /**< Do we need to run Scharr X ? */
- bool _run_scharr_y; /**< Do we need to run Scharr Y ? */
- const ITensor *_input; /**< Input tensor */
- ITensor *_output_x; /**< Output tensor for scharr X */
- ITensor *_output_y; /**< Output tensor for scharr Y */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESCHARR3x3KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESELECTKERNEL_H
-#define ARM_COMPUTE_NESELECTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the select kernel
- *
- * Select is computed by:
- * @f[ output(i) = condition(i) ? x(i) : y(i) @f]
- *
- */
-class NESelectKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESelectKernel";
- }
- /** Default constructor */
- NESelectKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESelectKernel(const NESelectKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESelectKernel &operator=(const NESelectKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESelectKernel(NESelectKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESelectKernel &operator=(NESelectKernel &&) = default;
- /** Default destructor */
- ~NESelectKernel() = default;
-
- /** Common signature for all the specialised elementwise functions
- *
- * @param[in] c Condition input tensor. Data types supported: U8.
- * @param[in] x First input tensor. Data types supported: All.
- * @param[out] y Second input tensor. Data types supported: Same as @p x
- * @param[in] output Output tensor. Data types supported: Same as @p x
- */
- void configure(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output);
-
- /** Validate the argument passed to the kernel
- *
- * @param[in] c Condition input tensor. Data types supported: U8.
- * @param[in] x First input tensor. Data types supported: All.
- * @param[in] y Second input tensor. Data types supported: Same as @p x
- * @param[in] output Output tensor. Data types supported: Same as @p x.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the specialised select functions
- *
- * @param[in] c Condition input tensor. Data types supported: U8.
- * @param[in] x First input tensor. Data types supported: All.
- * @param[in] y Second input tensor. Data types supported: Same as @p x
- * @param[in] output Output tensor. Data types supported: Same as @p x.
- */
- using SelectFunction = void(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window);
-
- /** Select function to use for the particular tensor types passed to configure() */
- SelectFunction *_function;
- const ITensor *_c; /**< Condition tensor */
- const ITensor *_x; /**< Source tensor 1 */
- const ITensor *_y; /**< Source tensor 2 */
- ITensor *_output; /**< Destination tensor */
- bool _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESELECTKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL3x3KERNEL_H
-#define ARM_COMPUTE_NESOBEL3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run a 3x3 Sobel X filter on a tensor.
- *
- * @f[
- * \mathbf{G}_x=\begin{vmatrix}
- * -1 & 0 & +1\\
- * -2 & 0 & +2\\
- * -1 & 0 & +1
- * \end{vmatrix}
- * @f]
-*/
-class NESobel3x3Kernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESobel3x3Kernel";
- }
- /** Default constructor */
- NESobel3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel3x3Kernel(const NESobel3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel3x3Kernel &operator=(const NESobel3x3Kernel &) = delete;
- /** Allow instances of this class to be moved */
- NESobel3x3Kernel(NESobel3x3Kernel &&) = default;
- /** Allow instances of this class to be moved */
- NESobel3x3Kernel &operator=(NESobel3x3Kernel &&) = default;
- /** Default destructor */
- ~NESobel3x3Kernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- bool _run_sobel_x; /**< Do we need to run Sobel X ? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
- const ITensor *_input; /**< Input tensor */
- ITensor *_output_x; /**< Output tensor for sobel X */
- ITensor *_output_y; /**< Output tensor for sobel Y */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESOBEL3x3KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL5x5KERNEL_H
-#define ARM_COMPUTE_NESOBEL5x5KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor.
- *
- */
-class NESobel5x5HorKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESobel5x5HorKernel";
- }
- /** Default constructor */
- NESobel5x5HorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel5x5HorKernel(const NESobel5x5HorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel5x5HorKernel &operator=(const NESobel5x5HorKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESobel5x5HorKernel(NESobel5x5HorKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESobel5x5HorKernel &operator=(NESobel5x5HorKernel &&) = default;
- /** Default destructor */
- ~NESobel5x5HorKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @note At least one of output_x or output_y must be set
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- const ITensor *_input; /**< Input tensor */
- ITensor *_output_x; /**< X output of horizontal pass */
- ITensor *_output_y; /**< Y output of horizontal pass */
- bool _run_sobel_x; /**< Do we need to run Sobel X? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y? */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel to run the vertical pass of 5x5 Sobel Y filter on a tensor.
- *
-*/
-class NESobel5x5VertKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESobel5x5VertKernel";
- }
- /** Default constructor */
- NESobel5x5VertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel5x5VertKernel(const NESobel5x5VertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel5x5VertKernel &operator=(const NESobel5x5VertKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESobel5x5VertKernel(NESobel5x5VertKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESobel5x5VertKernel &operator=(NESobel5x5VertKernel &&) = default;
- /** Default destructor */
- ~NESobel5x5VertKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input_x Input for X (X output of hor pass). Data type supported: S16.
- * @param[in] input_y Input for Y (Y output of hor pass). Data type supported: S16.
- * @param[out] output_x Destination tensor for the X gradient. Data type supported: S16.
- * @param[out] output_y Destination tensor for the Y gradient. Data type supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- ITensor *_input_x; /**< X input (X output of the hor pass) */
- ITensor *_input_y; /**< Y input (Y output of the hor pass) */
- ITensor *_output_x; /**< X output of sobel */
- ITensor *_output_y; /**< Y output of sobel */
- bool _run_sobel_x; /**< Do we need to run sobel X? */
- bool _run_sobel_y; /**< Do we need to run sobel Y? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESOBEL5x5KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL7x7KERNEL_H
-#define ARM_COMPUTE_NESOBEL7x7KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor.
- *
- */
-class NESobel7x7HorKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESobel7x7HorKernel";
- }
- /** Default constructor */
- NESobel7x7HorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel7x7HorKernel(const NESobel7x7HorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel7x7HorKernel &operator=(const NESobel7x7HorKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESobel7x7HorKernel(NESobel7x7HorKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESobel7x7HorKernel &operator=(NESobel7x7HorKernel &&) = default;
- /** Default destructor */
- ~NESobel7x7HorKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- const ITensor *_input; /**< Input tensor */
- ITensor *_output_x; /**< X output of horizontal pass */
- ITensor *_output_y; /**< Y output of horizontal pass */
- bool _run_sobel_x; /**< Do we need to run Sobel X? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y? */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel to run the vertical pass of 7x7 Sobel Y filter on a tensor.
- *
-*/
-class NESobel7x7VertKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESobel7x7VertKernel";
- }
- /** Default constructor */
- NESobel7x7VertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel7x7VertKernel(const NESobel7x7VertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESobel7x7VertKernel &operator=(const NESobel7x7VertKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESobel7x7VertKernel(NESobel7x7VertKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESobel7x7VertKernel &operator=(NESobel7x7VertKernel &&) = default;
- /** Default destructor */
- ~NESobel7x7VertKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @note At least one of output_x or output_y must be set
- * @note If output_x is set then input_x must be set too
- * @note If output_y is set then input_y must be set too
- *
- * @param[in] input_x (Optional) Input for X (X output of hor pass). Data type supported: S32.
- * @param[in] input_y (Optional) Input for Y (Y output of hor pass). Data type supported: S32.
- * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input_x, const ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- const ITensor *_input_x; /**< X input (X output of the hor pass) */
- const ITensor *_input_y; /**< Y input (Y output of the hor pass) */
- ITensor *_output_x; /**< X output of sobel */
- ITensor *_output_y; /**< Y output of sobel */
- bool _run_sobel_x; /**< Do we need to run sobel X? */
- bool _run_sobel_y; /**< Do we need to run sobel Y? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESOBEL7x7KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H
-#define ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the identifying the max value of 1D Logits */
-class NELogits1DMaxKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NELogits1DMaxKernel";
- }
- /** Default constructor */
- NELogits1DMaxKernel();
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data types supported: same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DMaxKernel
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Destination tensor. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- using Logits1DMaxFunction = void(const ITensor &in, ITensor &out, const Window &window);
-
-private:
- Logits1DMaxFunction *_func;
- BorderSize _border_size;
-};
-
-/** Interface for softmax computation for QASYMM8 with pre-computed max. */
-template <bool IS_LOG = false>
-class NELogits1DSoftmaxKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- if(IS_LOG)
- {
- return "NELogits1DSoftmaxKernel";
- }
- else
- {
- return "NELogits1DLogSoftmaxKernel";
- }
- }
- /** Default constructor */
- NELogits1DSoftmaxKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELogits1DSoftmaxKernel(const NELogits1DSoftmaxKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NELogits1DSoftmaxKernel &operator=(const NELogits1DSoftmaxKernel &) = delete;
- /** Allow instances of this class to be moved */
- NELogits1DSoftmaxKernel(NELogits1DSoftmaxKernel &&) = default;
- /** Allow instances of this class to be moved */
- NELogits1DSoftmaxKernel &operator=(NELogits1DSoftmaxKernel &&) = default;
- /** Default destructor */
- ~NELogits1DSoftmaxKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] max Max values tensor. Same shape as input with dimension 0 set to 1.
- * Data types supported: same as @p input.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] beta A scaling factor for the exponent.
- *
- * @param tmp Auxiliary tensor. Must be type F32 and same shape as the input.
- */
- void configure(const ITensor *input, const ITensor *max, ITensor *output, const float beta, ITensor *tmp);
- /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DSoftmaxKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] max Max values tensor info. Same shape as input with dimension 0 set to 1.
- * Data types supported: same as @p input.
- * @param[in] output Destination tensor info. Data types supported: same as @p input.
- * @param[in] beta A scaling factor for the exponent.
- * @param[in] tmp Tensor info of auxiliary. Must be type F32 and same shape as the input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *max,
- const ITensorInfo *output, const float beta, const ITensorInfo *tmp);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- using LogitsSoftmaxFunction = void(const ITensor &in, const ITensor &max, void *const tmp, ITensor &out, const float beta,
- const Window &window);
-
- LogitsSoftmaxFunction *_func;
- const ITensor *_input;
- const ITensor *_max;
- ITensor *_output;
- float _beta;
- ITensor *_tmp; //Temporary. Used internally
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H
-#define ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declaration
-class ITensor;
-
-/** Interface for the space to batch kernel */
-class NESpaceToBatchLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESpaceToBatchLayerKernel";
- }
- /** Default constructor */
- NESpaceToBatchLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESpaceToBatchLayerKernel(const NESpaceToBatchLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESpaceToBatchLayerKernel &operator=(const NESpaceToBatchLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESpaceToBatchLayerKernel(NESpaceToBatchLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESpaceToBatchLayerKernel &operator=(NESpaceToBatchLayerKernel &&) = default;
- /** Default destructor */
- ~NESpaceToBatchLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output);
- /** Initialise the kernel's input and output. (Static block shape and paddings)
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] padding_left The left padding of the output tensor.
- * @param[in] padding_right The right padding of the output tensor.
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel (Static block shape and paddings)
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] padding_left The left padding of the output tensor.
- * @param[in] padding_right The right padding of the output tensor.
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input; /**< Source tensor */
- const ITensor *_block_shape; /**< Block shape tensor */
- const ITensor *_paddings; /**< Paddings tensor */
- ITensor *_output; /**< Destination tensor */
- DataLayout _data_layout; /**< Data layout to be used at run-time */
-
- Size2D _padding_left;
- int _block_shape_x;
- int _block_shape_y;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H
-#define ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the space to depth kernel */
-class NESpaceToDepthLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NESpaceToDepthLayerKernel";
- }
- /** Default constructor */
- NESpaceToDepthLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESpaceToDepthLayerKernel(const NESpaceToDepthLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESpaceToDepthLayerKernel &operator=(const NESpaceToDepthLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESpaceToDepthLayerKernel(NESpaceToDepthLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESpaceToDepthLayerKernel &operator=(NESpaceToDepthLayerKernel &&) = default;
- /** Default destructor */
- ~NESpaceToDepthLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape value
- */
- void configure(const ITensor *input, ITensor *output, int32_t block_shape);
- /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToDepthLayerKernel
- *
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
- * @param[in] output Tensor output info. Data types supported: same as @p input
- * @param[in] block_shape Block shape value
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input; /**< Source tensor */
- ITensor *_output; /**< Destination tensor */
- int32_t _block_shape; /**< Block shape */
- DataLayout _data_layout; /**< Data layout of the operation */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NESTACKLAYERKERNEL_H
-#define ARM_COMPUTE_NESTACKLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/
-class NEStackLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEStackLayerKernel";
- }
- /** Default constructor */
- NEStackLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEStackLayerKernel(const NEStackLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEStackLayerKernel &operator=(const NEStackLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEStackLayerKernel(NEStackLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEStackLayerKernel &operator=(NEStackLayerKernel &&) = default;
- /** Default destructor */
- ~NEStackLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @note Supported input tensor rank: up to 4
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
- * @param[in] idx_input Index of the input tensor in the list of tensors to stack.
- * All tensors in the list must have the same shape
- * @param[in] num_tensors Number of tensors to stack
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const ITensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEStackLayerKernel
- *
- * @note Supported input tensor rank: up to 4
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
- * @param[in] idx_input Index of the input tensor in the list of tensors to stack
- * All tensors in the list must have the same shape
- * @param[in] num_tensors Number of tensors to stack
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output);
-
- // Inherited methods overridden
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- unsigned int _axis;
- unsigned int _idx_input;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESTACKLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H
-#define ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to perform tensor strided slicing */
-class NEStridedSliceKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEStridedSliceKernel";
- }
- /** Default constructor */
- NEStridedSliceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEStridedSliceKernel(const NEStridedSliceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEStridedSliceKernel &operator=(const NEStridedSliceKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEStridedSliceKernel(NEStridedSliceKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEStridedSliceKernel &operator=(NEStridedSliceKernel &&) = default;
- /** Default destructor */
- ~NEStridedSliceKernel() = default;
- /** Configure kernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] input Source tensor info. Data type supported: All
- * @param[out] output Destination tensor info. Data type supported: Same as @p input
- * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
- * A slice of size 1 starting from starts[i] in the dimension must be preserved.
- */
- void configure(const ITensorInfo *input, ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSliceKernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] input Source tensor info. Data type supported: All
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
- * A slice of size 1 starting from starts[i] in the dimension must be preserved.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
- Coordinates _starts_abs; /**< Absolute start coordinates */
- Coordinates _final_strides; /**< Final strides */
- int32_t _shrink_mask; /**< Shrink axis mask */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETABLELOOKUPKERNEL_H
-#define ARM_COMPUTE_NETABLELOOKUPKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-class ILut;
-
-/** Interface for the kernel to perform table lookup calculations. */
-class NETableLookupKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NETableLookupKernel";
- }
- /** Default constructor */
- NETableLookupKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NETableLookupKernel(const NETableLookupKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NETableLookupKernel &operator=(const NETableLookupKernel &) = delete;
- /** Allow instances of this class to be moved */
- NETableLookupKernel(NETableLookupKernel &&) = default;
- /** Allow instances of this class to be moved */
- NETableLookupKernel &operator=(NETableLookupKernel &&) = default;
- /** Initialise the kernel's input, lut and output.
- *
- * @param[in] input An input tensor. Data types supported: U8/S16.
- * @param[in] lut The input LUT.
- * @param[out] output The output tensor. Data types supported: same as @p input
- */
- void configure(const ITensor *input, const ILut *lut, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Perform table lookup on a given window.
- *
- * @param window window Region on which to execute the kernel.
- */
- template <class T>
- void tableLookup(const Window &window);
- /** Common signature for all the specialised lut functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using TableLookupFunction = void (NETableLookupKernel::*)(const Window &window);
- /** Sub function to use for the particular tensor types passed to configure() */
- TableLookupFunction _func;
- const ILut *_lut;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NETABLELOOKUPKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETHRESHOLDKERNEL_H
-#define ARM_COMPUTE_NETHRESHOLDKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the thresholding kernel */
-class NEThresholdKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEThresholdKernel";
- }
- /** Constructor
- * Initialize all the pointers to nullptr and parameters to zero.
- */
- NEThresholdKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEThresholdKernel(const NEThresholdKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEThresholdKernel &operator=(const NEThresholdKernel &) = delete;
- /** Initialise the kernel's input, output and threshold parameters.
- *
- * @param[in] input An input tensor. Data type supported: U8
- * @param[out] output The output tensor. Data type supported: U8.
- * @param[in] info Threshold kernel descriptor
- */
- void configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref NEThresholdKernel
- *
- * @param[in] input Input tensor info. Data type supported: U8
- * @param[in] output Output tensor info. Data type supported: U8
- * @param[in] info Threshold kernel descriptor
- *
- * @return A status containing an error code in case of failure
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** run binary thresholding on the given window */
- void run_binary(const Window &window);
- /** run range thresholding on the given window */
- void run_range(const Window &window);
-
- void (NEThresholdKernel::*_func)(const Window &window);
-
- const ITensor *_input; /**< Input */
- ITensor *_output; /**< Output */
- ThresholdKernelInfo _info; /**< Threshold descriptor */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETILEKERNEL_H
-#define ARM_COMPUTE_NETILEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a tile operation */
-class NETileKernel : public INEKernel
-{
-public:
- /** Default constructor */
- NETileKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NETileKernel(const NETileKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NETileKernel &operator=(const NETileKernel &) = delete;
- /** Allow instances of this class to be moved */
- NETileKernel(NETileKernel &&) = default;
- /** Allow instances of this class to be moved */
- NETileKernel &operator=(NETileKernel &&) = default;
- const char *name() const override
- {
- return "NETileKernel";
- }
- /** Set the source, destination of the kernel
- *
- * @param[in] input Source tensor. Data type supported: All.
- * @param[out] output Destination tensor. Same as @p input
- * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
- */
- void configure(const ITensor *input, ITensor *output, const Multiples &multiples);
- /** Static function to check if given info will lead to a valid configuration of @ref NETileKernel
- *
- * @param[in] input Source tensor info. Data type supported: All.
- * @param[in] output Destination tensor info. Same as @p input
- * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NETILEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETRANSPOSEKERNEL_H
-#define ARM_COMPUTE_NETRANSPOSEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel which transposes the elements of a matrix.
- *
- * [width, height, batch] -> [height, width, batch]
- *
- */
-class NETransposeKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NETransposeKernel";
- }
- /** Default constructor */
- NETransposeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NETransposeKernel(const NETransposeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NETransposeKernel &operator=(const NETransposeKernel &) = delete;
- /** Allow instances of this class to be moved */
- NETransposeKernel(NETransposeKernel &&) = default;
- /** Allow instances of this class to be moved */
- NETransposeKernel &operator=(NETransposeKernel &&) = default;
- /** Default destructor */
- ~NETransposeKernel() = default;
-
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: Same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NETransposeKernel
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[in] output Output tensor. Data type supported: Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Common signature for all the transpose functions
- *
- * @param[in] input An input tensor. Data types supported: All
- * @param[out] output The output tensor. Data type supported: same as @p input
- * @param[in] window Region on which to execute the kernel.
- */
- using TransposeFunction = void(const ITensor *input, ITensor *output, const Window &window);
- /** Transpose function to use for the particular tensor types passed to configure() */
- TransposeFunction *_func;
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NETRANSPOSEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H
-#define ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the Upsample layer kernel.*/
-class NEUpsampleLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEUpsampleLayerKernel";
- }
- /** Default constructor */
- NEUpsampleLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEUpsampleLayerKernel(const NEUpsampleLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEUpsampleLayerKernel &operator=(const NEUpsampleLayerKernel &) = delete;
- /** Default Move Constructor. */
- NEUpsampleLayerKernel(NEUpsampleLayerKernel &&) = default;
- /** Default move assignment operator */
- NEUpsampleLayerKernel &operator=(NEUpsampleLayerKernel &&) = default;
- /** Default destructor */
- ~NEUpsampleLayerKernel() = default;
- /** Set the input output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] info Contains stride information described in @ref Size2D.
- * @param[in] policy Defines the policy to fill the intermediate pixels.
- *
- */
- void configure(const ITensor *input, ITensor *output, const Size2D &info, const InterpolationPolicy policy);
- /** Static function to check if given info will lead to a valid configuration of @ref NEUpsampleLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Destination tensor info. Data types supported: same as @p input.
- * @param[in] info Contains stride information described in @ref Size2D.
- * @param[in] policy Defines the policy to fill the intermediate pixels.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy policy);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Function to run upsample layer (NCHW)
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T, int S>
- void upsample_nchw(const Window &window);
- /** Function to run upsample layer (NHWC)
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T, int S>
- void upsample_nhwc(const Window &window);
-
- using UpsampleFunctionPtr = void (NEUpsampleLayerKernel::*)(const Window &window);
-
-private:
- UpsampleFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- Size2D _info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEWARPKERNEL_H
-#define ARM_COMPUTE_NEWARPKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <array>
-#include <cstdint>
-namespace arm_compute
-{
-class ITensor;
-
-/** Common interface for warp affine and warp perspective */
-class INEWarpKernel : public INEKernel
-{
-public:
- /** Default constructor */
- INEWarpKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- INEWarpKernel(const INEWarpKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- INEWarpKernel &operator=(const INEWarpKernel &) = delete;
- /** Allow instances of this class to be moved */
- INEWarpKernel(INEWarpKernel &&) = default;
- /** Allow instances of this class to be moved */
- INEWarpKernel &operator=(INEWarpKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data type supported: U8.
- * @param[in] matrix The perspective or affine matrix to use. Must be 2x3 for affine and 3x3 for perspective of type float.
- * The matrix argument requires 9 values, for the affine case the last 3 values are ignored.
- * @param[in] border_mode Strategy to use for borders
- * @param[in] constant_border_value Constant value used for filling the border.
- */
- virtual void configure(const ITensor *input, ITensor *output, const std::array<float, 9> &matrix, BorderMode border_mode, uint8_t constant_border_value);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-
-protected:
- /** function to perform warp affine or warp perspective on the given window when border mode == UNDEFINED
- *
- * @param[in] window Region on which to execute the kernel
- */
- virtual void warp_undefined(const Window &window) = 0;
- /** function to perform warp affine or warp perspective on the given window when border mode == CONSTANT
- *
- * @param[in] window Region on which to execute the kernel
- */
- virtual void warp_constant(const Window &window) = 0;
- /** function to perform warp affine or warp perspective on the given window when border mode == REPLICATE
- *
- * @param[in] window Region on which to execute the kernel
- */
- virtual void warp_replicate(const Window &window) = 0;
- /** Common signature for all the specialised warp functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- void (INEWarpKernel::*_func)(const Window &window);
-
- const ITensor *_input; /**< Input Tensor */
- ITensor *_output; /**< Output Tensor */
- uint8_t _constant_border_value; /**< Constant value used for filling the border. This value is used for those pixels out of the ROI when the border mode is CONSTANT */
- std::array<float, 9> _matrix; /**< The affine or perspective matrix. Must be 2x3 for warp affine or 3x3 for warp perspective of type float. */
-};
-
-/** Template interface for the kernel to compute warp affine
- *
- */
-template <InterpolationPolicy interpolation>
-class NEWarpAffineKernel : public INEWarpKernel
-{
-private:
- const char *name() const override
- {
- return "NEWarpAffineKernel";
- }
- // Inherited methods overridden:
- void warp_undefined(const Window &window) override;
- void warp_constant(const Window &window) override;
- void warp_replicate(const Window &window) override;
-};
-
-/** Template interface for the kernel to compute warp perspective
- *
- */
-template <InterpolationPolicy interpolation>
-class NEWarpPerspectiveKernel : public INEWarpKernel
-{
-private:
- const char *name() const override
- {
- return "NEWarpPerspectiveKernel";
- }
- // Inherited methods overridden:
- void warp_undefined(const Window &window) override;
- void warp_constant(const Window &window) override;
- void warp_replicate(const Window &window) override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEWARPKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H
-#define ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform reshaping on the weights used by convolution and locally connected layer
- *
- * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels.
- * In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication.
- *
- * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have:
- * @f[
- * \left( \begin{array}{ccc}
- * a000 & a001 & a002 \\
- * a010 & a011 & a012 \\
- * a020 & a021 & a022 \\
- * \end{array} \right)
- * \left( \begin{array}{ccc}
- * a100 & a101 & a102 \\
- * a110 & a111 & a112 \\
- * a120 & a121 & a122 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccc}
- * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\
- * \end{array} \right)
- * @f]
- */
-class NEWeightsReshapeKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEWeightsReshapeKernel";
- }
- /** Constructor.*/
- NEWeightsReshapeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWeightsReshapeKernel(const NEWeightsReshapeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWeightsReshapeKernel &operator=(const NEWeightsReshapeKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEWeightsReshapeKernel(NEWeightsReshapeKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEWeightsReshapeKernel &operator=(NEWeightsReshapeKernel &&) = default;
- /** Default destructor */
- ~NEWeightsReshapeKernel() = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared.
- * Data types supported: All
- * @param[in] bias The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
- * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[out] output The output tensor. Data types supported: Same as @p input
- */
- void configure(const ITensor *input, const ITensor *bias, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEWeightsReshapeKernel
- *
- * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared.
- * Data types supported: All
- * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
- * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[in] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the width concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class NEWidthConcatenateLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEWidthConcatenateLayerKernel";
- }
- /** Default constructor */
- NEWidthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWidthConcatenateLayerKernel(const NEWidthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEWidthConcatenateLayerKernel &operator=(const NEWidthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEWidthConcatenateLayerKernel(NEWidthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEWidthConcatenateLayerKernel &operator=(NEWidthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~NEWidthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] width_offset The offset on the X axis.
- * @param[in,out] output Output tensor info. Data types supported: Same as @p input.
- */
- void configure(const ITensorInfo *input, unsigned int width_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] width_offset The offset on the X axis.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
- unsigned int _width_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEYOLOLAYERKERNEL_H
-#define ARM_COMPUTE_NEYOLOLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the YOLO layer kernel. */
-class NEYOLOLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEYOLOLayerKernel";
- }
- /** Constructor */
- NEYOLOLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEYOLOLayerKernel(const NEYOLOLayerKernel &) = delete;
- /** Default move constructor */
- NEYOLOLayerKernel(NEYOLOLayerKernel &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEYOLOLayerKernel &operator=(const NEYOLOLayerKernel &) = delete;
- /** Default move assignment operator */
- NEYOLOLayerKernel &operator=(NEYOLOLayerKernel &&) = default;
- /** Default destructor */
- ~NEYOLOLayerKernel() = default;
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] act_info Activation layer parameters.
- * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
- */
- void configure(ITensor *input, ITensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
- /** Static function to check if given info will lead to a valid configuration of @ref NEYOLOLayerKernel
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- /** Function to run YOLO layer
- *
- * @param[in] window Region on which to execute the kernel.
- */
- template <typename T, int S>
- void yolo_layer_nchw(const Window &window);
- /** Function to run YOLO layer on tensors with NHWC format
- *
- * @param[in] window Region on which to execute the kernel.
- */
- template <typename T>
- void yolo_layer_nhwc(const Window &window);
- /** Common signature for all the yolo layer functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using YOLOFunctionPtr = void (NEYOLOLayerKernel::*)(const Window &window);
-
-private:
- YOLOFunctionPtr _func;
- ITensor *_input;
- ITensor *_output;
- ActivationLayerInfo _act_info;
- int32_t _num_classes;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEYOLOLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#pragma once
-
-/* This file is used to configure integration-specific aspects of arm_gemm into ACL */
-
-#include "arm_compute/core/CPP/CPPTypes.h"
-
-namespace arm_gemm
-{
-using CPUModel = arm_compute::CPUModel;
-using CPUInfo = arm_compute::CPUInfo;
-} // namespace arm_compute
-
-
-
/** Align columns */
bool align_columns;
};
+
+/** Internal keypoint class for Lucas-Kanade Optical Flow */
+struct NELKInternalKeypoint
+{
+ float x{ 0.f }; /**< x coordinate of the keypoint */
+ float y{ 0.f }; /**< y coordinate of the keypoint */
+ bool tracking_status{ false }; /**< the tracking status of the keypoint */
+};
+
} // namespace arm_compute
#endif /* ARM_COMPUTE_TYPES_H */
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_UTILS_TRAITS_TRAITS_H
#define ARM_COMPUTE_UTILS_TRAITS_TRAITS_H
+#include "arm_compute/core/Types.h"
#include <type_traits>
namespace arm_compute
#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
+#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
+#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
-
#include <cstdint>
#include <memory>
#ifndef ARM_COMPUTE_IOPERATOR_H
#define ARM_COMPUTE_IOPERATOR_H
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/runtime/IOperator.h"
#include "arm_compute/runtime/IRuntimeContext.h"
#include "arm_compute/runtime/Types.h"
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#define ARM_COMPUTE_ITRANSFORMWEIGHTS_H
#include <atomic>
+#include <utility>
namespace arm_compute
{
};
} // arm_compute
-#endif /*ARM_COMPUTE_ITRANSFORMWEIGHTS_H */
\ No newline at end of file
+#endif /*ARM_COMPUTE_ITRANSFORMWEIGHTS_H */
#define ARM_COMPUTE_INEOPERATOR_H
#include "../../core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/runtime/IOperator.h"
#include "arm_compute/runtime/IRuntimeContext.h"
#include "arm_compute/runtime/Types.h"
namespace arm_compute
{
+class ICPPKernel;
+using INEKernel = ICPPKernel;
namespace experimental
{
/** Basic interface for functions which have a single async NEON kernel */
INEOperator &operator=(const INEOperator &) = delete;
/** Default move assignment operator */
INEOperator &operator=(INEOperator &&) = default;
+ /** Default destructor */
+ ~INEOperator();
// Inherited methods overridden:
void run(ITensorPack &tensors) override;
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_INESIMPLEFUNCTION_H
#define ARM_COMPUTE_INESIMPLEFUNCTION_H
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include <memory>
namespace arm_compute
{
+class ICPPKernel;
+class NEFillBorderKernel;
+using INEKernel = ICPPKernel;
/** Basic interface for functions which have a single NEON kernel */
class INESimpleFunction : public IFunction
{
public:
/** Constructor */
INESimpleFunction();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ INESimpleFunction(const INESimpleFunction &) = delete;
+ /** Default move constructor */
+ INESimpleFunction(INESimpleFunction &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ INESimpleFunction &operator=(const INESimpleFunction &) = delete;
+ /** Default move assignment operator */
+ INESimpleFunction &operator=(INESimpleFunction &&) = default;
+ /** Default destructor */
+ ~INESimpleFunction();
// Inherited methods overridden:
void run() override final;
protected:
- std::unique_ptr<INEKernel> _kernel; /**< Kernel to run */
- NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */
+ std::unique_ptr<INEKernel> _kernel; /**< Kernel to run */
+ std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
};
}
#endif /*ARM_COMPUTE_INESIMPLEFUNCTION_H */
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H
#define ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IRuntimeContext.h"
namespace arm_compute
{
+class ICPPKernel;
+using INEKernel = ICPPKernel;
/** Basic interface for functions which have a single NEON kernel and no border */
class INESimpleFunctionNoBorder : public IFunction
{
INESimpleFunctionNoBorder &operator=(const INESimpleFunctionNoBorder &) = delete;
/** Default move assignment operator */
INESimpleFunctionNoBorder &operator=(INESimpleFunctionNoBorder &&) = default;
+ /** Default destructor */
+ ~INESimpleFunctionNoBorder();
// Inherited methods overridden:
void run() override final;
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H
#define ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
* @note The image data type for the inputs must be U8 or S16
* @note The function calculates the absolute difference also when the 2 inputs have different image data types
*/
-class NEAbsoluteDifference : public INESimpleFunction
+class NEAbsoluteDifference : public INESimpleFunctionNoBorder
{
public:
+ /** Default constructor */
+ NEAbsoluteDifference() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAbsoluteDifference(const NEAbsoluteDifference &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAbsoluteDifference &operator=(const NEAbsoluteDifference &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEAbsoluteDifference(NEAbsoluteDifference &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEAbsoluteDifference &operator=(NEAbsoluteDifference &&) = delete;
+ /** Default destructor */
+ ~NEAbsoluteDifference();
/** Set the inputs and output images
*
* @param[in] input1 Source tensor. Data types supported: U8/S16.
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NEAccumulate : public INESimpleFunctionNoBorder
{
public:
+ /** Default constructor */
+ NEAccumulate() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAccumulate(const NEAccumulate &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAccumulate &operator=(const NEAccumulate &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEAccumulate(NEAccumulate &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEAccumulate &operator=(NEAccumulate &&) = delete;
+ /** Default destructor */
+ ~NEAccumulate();
/** Set the input and accumulation tensors
*
* @param[in] input Source tensor. Data type supported: U8.
class NEAccumulateWeighted : public INESimpleFunctionNoBorder
{
public:
+ /** Default constructor */
+ NEAccumulateWeighted() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAccumulateWeighted(const NEAccumulateWeighted &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAccumulateWeighted &operator=(const NEAccumulateWeighted &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEAccumulateWeighted(NEAccumulateWeighted &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEAccumulateWeighted &operator=(NEAccumulateWeighted &&) = delete;
+ /** Default destructor */
+ ~NEAccumulateWeighted();
/** Set the input and accumulation tensors, and the scale value
*
* @param[in] input Source tensor. Data type supported: U8.
class NEAccumulateSquared : public INESimpleFunctionNoBorder
{
public:
+ /** Default constructor */
+ NEAccumulateSquared() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAccumulateSquared(const NEAccumulateSquared &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAccumulateSquared &operator=(const NEAccumulateSquared &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEAccumulateSquared(NEAccumulateSquared &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEAccumulateSquared &operator=(NEAccumulateSquared &&) = delete;
+ /** Default destructor */
+ ~NEAccumulateSquared();
/** Set the input and accumulation tensors and the shift value.
*
* @param[in] input Source tensor. Data type supported: U8.
* @param[in] ctx Runtime context to be used by the function
*/
NEActivationLayer(IRuntimeContext *ctx = nullptr);
- /** Destructor */
- ~NEActivationLayer();
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEActivationLayer(const NEActivationLayer &) = delete;
/** Default move constructor */
NEActivationLayer &operator=(const NEActivationLayer &) = delete;
/** Default move assignment operator */
NEActivationLayer &operator=(NEActivationLayer &&);
+ /** Destructor */
+ ~NEActivationLayer();
/** [NEActivationLayer snippet] **/
/** Set the input and output tensor.
*
class NEActivationLayer : public INEOperator
{
public:
+ /** Constructor */
+ NEActivationLayer() = default;
+ /** Prevent instances of this class from being copied */
+ NEActivationLayer(const NEActivationLayer &) = delete;
+ /** Default move constructor */
+ NEActivationLayer(NEActivationLayer &&) = default;
+ /** Prevent instances of this class from being copied */
+ NEActivationLayer &operator=(const NEActivationLayer &) = delete;
+ /** Default move assignment operator */
+ NEActivationLayer &operator=(NEActivationLayer &&) = default;
+ /** Destructor */
+ ~NEActivationLayer();
+
/** Set the input and output tensor.
*
* @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
public:
/** Constructor */
NEArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArgMinMaxLayer(const NEArgMinMaxLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArgMinMaxLayer &operator=(const NEArgMinMaxLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEArgMinMaxLayer(NEArgMinMaxLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEArgMinMaxLayer &operator=(NEArgMinMaxLayer &&) = delete;
+ /** Default destructor */
+ ~NEArgMinMaxLayer();
/** Set the input and output tensors.
*
* @param[in] input Input source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32.
class NEArithmeticAddition : public INEOperator
{
public:
+ /** Constructor */
+ NEArithmeticAddition() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArithmeticAddition(const NEArithmeticAddition &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArithmeticAddition &operator=(const NEArithmeticAddition &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEArithmeticAddition(NEArithmeticAddition &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEArithmeticAddition &operator=(NEArithmeticAddition &&) = delete;
+ /** Default destructor */
+ ~NEArithmeticAddition();
/** Initialise the kernel's inputs, output and conversion policy.
*
* Valid configurations (Input1,Input2) -> Output :
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H
#define ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H
-#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include <memory>
+
namespace arm_compute
{
class ITensor;
+class NEBatchNormalizationLayerKernel;
/** Basic function to run @ref NENormalizationLayerKernel and simulate a batch normalization layer.
*
class NEBatchNormalizationLayer : public IFunction
{
public:
- /** Default constructor */
+ /** Constructor */
NEBatchNormalizationLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBatchNormalizationLayer(const NEBatchNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBatchNormalizationLayer &operator=(const NEBatchNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEBatchNormalizationLayer(NEBatchNormalizationLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEBatchNormalizationLayer &operator=(NEBatchNormalizationLayer &&) = delete;
+ /** Default destructor */
+ ~NEBatchNormalizationLayer();
/** Set the input and output tensors.
*
* @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
void run() override;
private:
- NEBatchNormalizationLayerKernel _norm_kernel; /**< Batch normalization layer kernel */
+ std::unique_ptr<NEBatchNormalizationLayerKernel> _norm_kernel; /**< Batch normalization layer kernel */
};
}
#endif /* ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H */
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEBatchToSpaceLayerKernel. */
class NEBatchToSpaceLayer : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEBatchToSpaceLayer() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBatchToSpaceLayer(const NEBatchToSpaceLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBatchToSpaceLayer &operator=(const NEBatchToSpaceLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEBatchToSpaceLayer(NEBatchToSpaceLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEBatchToSpaceLayer &operator=(NEBatchToSpaceLayer &&) = delete;
+ /** Default destructor */
+ ~NEBatchToSpaceLayer() = default;
/** Set the input and output tensors.
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NEBitwiseAnd : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEBitwiseAnd() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBitwiseAnd(const NEBitwiseAnd &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBitwiseAnd &operator=(const NEBitwiseAnd &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEBitwiseAnd(NEBitwiseAnd &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEBitwiseAnd &operator=(NEBitwiseAnd &&) = delete;
+ /** Default destructor */
+ ~NEBitwiseAnd() = default;
/** Initialise the kernel's inputs and output
*
* @param[in] input1 First tensor input. Data type supported: U8.
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEBOUNDINGBOXTRANSOFORM_H
#define ARM_COMPUTE_NEBOUNDINGBOXTRANSOFORM_H
-#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEBoundingBoxTransformKernel.
*
* This function calls the following Neon kernels:
* -# @ref NEBoundingBoxTransformKernel
*/
-class NEBoundingBoxTransform : public INESimpleFunction
+class NEBoundingBoxTransform : public INESimpleFunctionNoBorder
{
public:
/** Set the input and output tensors.
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NECANNYEDGE_H
#define ARM_COMPUTE_NECANNYEDGE_H
-#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
namespace arm_compute
{
class ITensor;
+class NEGradientKernel;
+class NEFillBorderKernel;
+class NEEdgeNonMaxSuppressionKernel;
+class NEEdgeTraceKernel;
/** Basic function to execute canny edge on NEON. This function calls the following NEON kernels and functions:
*
NECannyEdge(const NECannyEdge &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NECannyEdge &operator=(const NECannyEdge &) = delete;
+ /** Default destructor */
+ ~NECannyEdge();
/** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode.
*
* @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
void run() override;
private:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<IFunction> _sobel; /**< Pointer to Sobel kernel */
- std::unique_ptr<INEKernel> _gradient; /**< Gradient kernel */
- NEEdgeNonMaxSuppressionKernel _non_max_suppr; /**< Non-Maxima suppression kernel */
- NEEdgeTraceKernel _edge_trace; /**< Edge tracing kernel */
- NEFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
- NEFillBorderKernel _border_edge_trace; /**< Fill border before edge trace */
- Tensor _gx; /**< Source tensor - Gx component */
- Tensor _gy; /**< Source tensor - Gy component */
- Tensor _magnitude; /**< Source tensor - Magnitude */
- Tensor _phase; /**< Source tensor - Phase */
- Tensor _nonmax; /**< Source tensor - Non-Maxima suppressed */
- ITensor *_output; /**< Output tensor provided by the user. */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<IFunction> _sobel; /**< Pointer to Sobel kernel */
+ std::unique_ptr<NEGradientKernel> _gradient; /**< Gradient kernel */
+ std::unique_ptr<NEEdgeNonMaxSuppressionKernel> _non_max_suppr; /**< Non-Maxima suppression kernel */
+ std::unique_ptr<NEEdgeTraceKernel> _edge_trace; /**< Edge tracing kernel */
+ std::unique_ptr<NEFillBorderKernel> _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
+ std::unique_ptr<NEFillBorderKernel> _border_edge_trace; /**< Fill border before edge trace */
+ Tensor _gx; /**< Source tensor - Gx component */
+ Tensor _gy; /**< Source tensor - Gy component */
+ Tensor _magnitude; /**< Source tensor - Magnitude */
+ Tensor _phase; /**< Source tensor - Phase */
+ Tensor _nonmax; /**< Source tensor - Non-Maxima suppressed */
+ ITensor *_output; /**< Output tensor provided by the user. */
};
}
#endif /* ARM_COMPUTE_NECANNYEDGE_H */
#define ARM_COMPUTE_NECAST_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEDepthConvertLayerKernel.
* This function ignores the scale and zeroPoint of quanized tensors,so QASYMM8 input is treated as uint8 values.
*/
-class NECast : public INESimpleFunction
+class NECast : public INESimpleFunctionNoBorder
{
public:
/** Initialize the function's source, destination
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NECHANNELSHUFFLELAYER_H
#define ARM_COMPUTE_NECHANNELSHUFFLELAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEChannelShuffleLayerKernel
*
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Types.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NECol2Im */
class NECol2Im : public INESimpleFunctionNoBorder
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NECOMPUTEALLANCHORS_H
#define ARM_COMPUTE_NECOMPUTEALLANCHORS_H
-#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEComputeAllAnchorsKernel.
*
* This function calls the following NEON kernels:
* -# @ref NEComputeAllAnchorsKernel
*/
-class NEComputeAllAnchors : public INESimpleFunction
+class NEComputeAllAnchors : public INESimpleFunctionNoBorder
{
public:
/** Set the input and output tensors.
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/INEOperator.h"
#include "support/Requires.h"
class NEConcatenation : public INEOperator
{
public:
- /** Default constructor */
+ /** Constructor */
NEConcatenation();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConcatenation(const NEConcatenation &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConcatenation &operator=(const NEConcatenation &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConcatenation(NEConcatenation &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConcatenation &operator=(NEConcatenation &&) = delete;
+ /** Default destructor */
+ ~NEConcatenation() = default;
/** Initialise the kernel's inputs vector and output.
*
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
void run(ITensorPack &tensors) override;
private:
- std::vector<std::unique_ptr<INEKernel>> _concat_kernels;
- unsigned int _num_inputs;
- unsigned int _axis;
+ std::vector<std::unique_ptr<ICPPKernel>> _concat_kernels;
+ unsigned int _num_inputs;
+ unsigned int _axis;
};
} // namespace experimental
} // namespace arm_compute
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H
#define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H
-#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/ITransformWeights.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/Tensor.h"
+#include <memory>
namespace arm_compute
{
// Forward declarations
class ITensor;
+class NEConvertFullyConnectedWeightsKernel;
/** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */
class NEConvertFullyConnectedWeights : public IFunction
public:
/** Default constructor */
NEConvertFullyConnectedWeights();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvertFullyConnectedWeights(const NEConvertFullyConnectedWeights &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvertFullyConnectedWeights &operator=(const NEConvertFullyConnectedWeights &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvertFullyConnectedWeights(NEConvertFullyConnectedWeights &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvertFullyConnectedWeights &operator=(NEConvertFullyConnectedWeights &&) = delete;
+ /** Default destructor */
+ ~NEConvertFullyConnectedWeights();
/** Initialize the function.
*
* @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
void run() override;
private:
- NEConvertFullyConnectedWeightsKernel _kernel;
+ std::unique_ptr<NEConvertFullyConnectedWeightsKernel> _kernel;
};
namespace weights_transformations
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NECONVOLUTION_H
#define ARM_COMPUTE_NECONVOLUTION_H
-#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
namespace arm_compute
{
class ITensor;
+class NEFillBorderKernel;
+template <unsigned int matrix_size>
+class NEConvolutionKernel;
+template <unsigned int matrix_size>
+class NESeparableConvolutionHorKernel;
+template <unsigned int matrix_size>
+class NESeparableConvolutionVertKernel;
/** Basic function to execute convolution of size 3x3. This function calls the following NEON kernels:
*
class NEConvolution3x3 : public INESimpleFunction
{
public:
+ /** Constructor */
+ NEConvolution3x3() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolution3x3(const NEConvolution3x3 &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolution3x3 &operator=(const NEConvolution3x3 &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvolution3x3(NEConvolution3x3 &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvolution3x3 &operator=(NEConvolution3x3 &&) = delete;
+ /** Default destructor */
+ ~NEConvolution3x3();
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
public:
/** Default constructor */
NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolutionSquare(const NEConvolutionSquare &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolutionSquare &operator=(const NEConvolutionSquare &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvolutionSquare(NEConvolutionSquare &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvolutionSquare &operator=(NEConvolutionSquare &&) = delete;
+ /** Default destructor */
+ ~NEConvolutionSquare();
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
void run() override;
private:
- MemoryGroup _memory_group; /**< Function memory group */
- Tensor _tmp; /**< temporary buffer for output of horizontal pass */
- bool _is_separable; /**< true if the convolution can be separated */
- NESeparableConvolutionHorKernel<matrix_size> _kernel_hor; /**< kernel for horizontal pass of separated convolution */
- NESeparableConvolutionVertKernel<matrix_size> _kernel_vert; /**< kernel for vertical pass of separated convolution */
- NEConvolutionKernel<matrix_size> _kernel; /**< kernel for non-separated convolution **/
- NEFillBorderKernel _border_handler; /**< kernel for border handling */
+ MemoryGroup _memory_group; /**< Function memory group */
+ Tensor _tmp; /**< temporary buffer for output of horizontal pass */
+ bool _is_separable; /**< true if the convolution can be separated */
+ std::unique_ptr<NESeparableConvolutionHorKernel<matrix_size>> _kernel_hor; /**< kernel for horizontal pass of separated convolution */
+ std::unique_ptr<NESeparableConvolutionVertKernel<matrix_size>> _kernel_vert; /**< kernel for vertical pass of separated convolution */
+ std::unique_ptr<NEConvolutionKernel<matrix_size>> _kernel; /**< kernel for non-separated convolution **/
+ std::unique_ptr<NEFillBorderKernel> _border_handler; /**< kernel for border handling */
};
/** Basic function to run 5x5 convolution. */
class NEConvolutionRectangle : public INESimpleFunction
{
public:
+ /** Constructor */
+ NEConvolutionRectangle() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolutionRectangle(const NEConvolutionRectangle &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolutionRectangle &operator=(const NEConvolutionRectangle &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvolutionRectangle(NEConvolutionRectangle &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvolutionRectangle &operator=(NEConvolutionRectangle &&) = delete;
+ /** Default destructor */
+ ~NEConvolutionRectangle();
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
public:
/** Constructor */
NEConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolutionLayer(const NEConvolutionLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolutionLayer &operator=(const NEConvolutionLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvolutionLayer(NEConvolutionLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvolutionLayer &operator=(NEConvolutionLayer &&) = delete;
+ /** Default destructor */
+ ~NEConvolutionLayer() = default;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NECopyKernel */
class NECopy : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NECopy() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECopy(const NECopy &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECopy &operator=(const NECopy &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NECopy(NECopy &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NECopy &operator=(NECopy &&) = delete;
+ /** Default destructor */
+ ~NECopy();
/** Initialise the function's source and destination.
*
* @param[in] input Source tensor. Data types supported: All
#ifndef ARM_COMPUTE_NEON_CROP_RESIZE_H
#define ARM_COMPUTE_NEON_CROP_RESIZE_H
-#include "arm_compute/core/NEON/kernels/NECropKernel.h"
#include "arm_compute/runtime/NEON/functions/NEScale.h"
#include <memory>
{
// Forward Declarations
class ITensor;
+class NECropKernel;
/** Function to perform cropping and resizing */
class NECropResize : public IFunction
/** Allow instances of this class to be moved */
NECropResize &operator=(NECropResize &&) = default;
/** Default destructor */
- virtual ~NECropResize() = default;
+ ~NECropResize();
/** Configure kernel
*
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/**Basic function to run @ref NEDepthConvertLayerKernel */
class NEDepthConvertLayer : public INESimpleFunctionNoBorder
NEDepthConvertLayer(const NEDepthConvertLayer &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers)*/
const NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete;
+ /** Default destructor */
+ ~NEDepthConvertLayer() = default;
/** Initialize the function's source, destination
*
* Valid conversions Input -> Output :
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEDepthToSpaceLayerKernel. */
class NEDepthToSpaceLayer : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEDepthToSpaceLayer() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthToSpaceLayer(const NEDepthToSpaceLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthToSpaceLayer &operator=(const NEDepthToSpaceLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEDepthToSpaceLayer(NEDepthToSpaceLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEDepthToSpaceLayer &operator=(NEDepthToSpaceLayer &&) = delete;
+ /** Default destructor */
+ ~NEDepthToSpaceLayer() = default;
/** Set the input and output tensors.
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All
#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
#define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPermute.h"
#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
+#include <memory>
namespace arm_compute
{
// Forward declarations
class ITensor;
+class NEDepthwiseConvolutionLayerNativeKernel;
/** Function to execute a depthwise convolution.
*/
NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;
/** Default move assignment operator */
NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
+ /** Default destructor */
+ ~NEDepthwiseConvolutionLayer();
/** Initialize the function's source, destination, weights and convolution information.
*
* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
/** Default move assignment operator */
NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
+ /** Default destructor */
+ ~NEDepthwiseConvolutionLayerOptimizedInternal() = default;
/** Initialize the function's source, destination, kernels and border_size.
*
* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
void prepare() override;
private:
- MemoryGroup _memory_group;
- NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
- NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
- NEFillBorderKernel _border_handler;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _accumulator;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- const ITensor *_original_weights;
- bool _has_bias;
- bool _is_quantized;
- bool _is_nchw;
- bool _permute;
- bool _is_activationlayer_enabled;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
+ NEPermute _permute_input;
+ NEPermute _permute_weights;
+ NEPermute _permute_output;
+ NEActivationLayer _activationlayer_function;
+ Tensor _accumulator;
+ Tensor _permuted_input;
+ Tensor _permuted_weights;
+ Tensor _permuted_output;
+ const ITensor *_original_weights;
+ bool _has_bias;
+ bool _is_quantized;
+ bool _is_nchw;
+ bool _permute;
+ bool _is_activationlayer_enabled;
+ bool _is_prepared;
};
/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:
NEDepthwiseConvolutionLayerGeneric &operator=(const NEDepthwiseConvolutionLayerGeneric &) = delete;
/** Default move assignment operator */
NEDepthwiseConvolutionLayerGeneric &operator=(NEDepthwiseConvolutionLayerGeneric &&) = default;
+ /** Default destructor */
+ ~NEDepthwiseConvolutionLayerGeneric() = default;
/** Initialize the function's source, destination, weights and convolution information.
*
* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
void prepare() override;
private:
- NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- bool _is_prepared;
- bool _is_nchw;
- bool _is_activationlayer_enabled;
- const ITensor *_original_weights;
+ std::unique_ptr<NEDepthwiseConvolutionLayerNativeKernel> _depthwise_conv_kernel;
+ NEPermute _permute_input;
+ NEPermute _permute_weights;
+ NEPermute _permute_output;
+ NEActivationLayer _activationlayer_function;
+ Tensor _permuted_input;
+ Tensor _permuted_weights;
+ Tensor _permuted_output;
+ bool _is_prepared;
+ bool _is_nchw;
+ bool _is_activationlayer_enabled;
+ const ITensor *_original_weights;
};
DepthwiseConvolutionFunction _depth_conv_func;
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEDequantizationLayerKernel that dequantizes an input tensor */
class NEDequantizationLayer : public INESimpleFunctionNoBorder
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEDERIVATIVE_H
#define ARM_COMPUTE_NEDERIVATIVE_H
-#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include <cstdint>
+#include <memory>
namespace arm_compute
{
class ITensor;
+class NEDerivativeKernel;
+class NEFillBorderKernel;
/** Basic function to execute first order derivative operator. This function calls the following NEON kernels:
*
public:
/** Default constructor */
NEDerivative();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDerivative(const NEDerivative &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDerivative &operator=(const NEDerivative &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEDerivative(NEDerivative &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEDerivative &operator=(NEDerivative &&) = delete;
+ /** Default destructor */
+ ~NEDerivative();
/** Initialise the function's source, destinations and border mode.
*
* @note At least one of output_x or output_y must be not NULL.
void run() override;
private:
- NEDerivativeKernel _kernel; /**< Derivative kernel */
- NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */
+ std::unique_ptr<NEDerivativeKernel> _kernel; /**< Derivative kernel */
+ std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle tensor borders */
};
}
#endif /* ARM_COMPUTE_NEDERIVATIVE_H */
NEDetectionPostProcessLayer(const NEDetectionPostProcessLayer &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEDetectionPostProcessLayer &operator=(const NEDetectionPostProcessLayer &) = delete;
+ /** Default destructor */
+ ~NEDetectionPostProcessLayer() = default;
/** Configure the detection output layer NE function
*
* @param[in] input_box_encoding The bounding box input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32.
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H
#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
namespace arm_compute
{
+class NEDirectConvolutionLayerOutputStageKernel;
+class NEDirectConvolutionLayerKernel;
+class NEFillBorderKernel;
+
/** Function to run the direct convolution.
*
* This function calls the following NEON kernels:
public:
/** Constructor */
NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDirectConvolutionLayer(const NEDirectConvolutionLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDirectConvolutionLayer &operator=(const NEDirectConvolutionLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEDirectConvolutionLayer(NEDirectConvolutionLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEDirectConvolutionLayer &operator=(NEDirectConvolutionLayer &&) = delete;
+ /** Default destructor */
+ ~NEDirectConvolutionLayer();
/** Set the input, weights, biases and output tensors.
*
* @note: DirectConvolution only works in the following configurations:
void run() override;
private:
- MemoryGroup _memory_group;
- NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
- NEDirectConvolutionLayerKernel _conv_kernel;
- NEFillBorderKernel _input_border_handler;
- NEActivationLayer _activationlayer_function;
- Tensor _accumulator;
- bool _has_bias;
- bool _is_activationlayer_enabled;
- unsigned int _dim_split;
- bool _is_padding_required;
+ MemoryGroup _memory_group;
+ std::unique_ptr<NEDirectConvolutionLayerOutputStageKernel> _output_stage_kernel;
+ std::unique_ptr<NEDirectConvolutionLayerKernel> _conv_kernel;
+ std::unique_ptr<NEFillBorderKernel> _input_border_handler;
+ NEActivationLayer _activationlayer_function;
+ Tensor _accumulator;
+ bool _has_bias;
+ bool _is_activationlayer_enabled;
+ unsigned int _dim_split;
+ bool _is_padding_required;
};
}
#endif /* ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H */
#ifndef ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H
#define ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to perform inverse square root on an input tensor. */
class NERsqrtLayer : public INESimpleFunctionNoBorder
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H
#define ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H
-#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h"
-#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h"
#include "arm_compute/runtime/Distribution1D.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/Lut.h"
namespace arm_compute
{
class ITensor;
+class NEHistogramKernel;
+class NECumulativeDistributionKernel;
+class NETableLookupKernel;
using IImage = ITensor;
/** Basic function to execute histogram equalization. This function calls the following NEON kernels:
public:
/** Default Constructor. */
NEEqualizeHistogram();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEEqualizeHistogram(const NEEqualizeHistogram &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEEqualizeHistogram &operator=(const NEEqualizeHistogram &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEEqualizeHistogram(NEEqualizeHistogram &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEEqualizeHistogram &operator=(NEEqualizeHistogram &&) = delete;
+ /** Default destructor */
+ ~NEEqualizeHistogram();
/** Initialise the kernel's inputs.
*
* @note Currently the width of the input image must be a multiple of 16.
void run() override;
private:
- NEHistogramKernel _histogram_kernel; /**< Kernel that calculates the histogram of input. */
- NECumulativeDistributionKernel _cd_histogram_kernel; /**< Kernel that calculates the cumulative distribution
+ std::unique_ptr<NEHistogramKernel> _histogram_kernel; /**< Kernel that calculates the histogram of input. */
+ std::unique_ptr<NECumulativeDistributionKernel> _cd_histogram_kernel; /**< Kernel that calculates the cumulative distribution
and creates the relevant LookupTable. */
- NETableLookupKernel _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */
- Distribution1D _hist; /**< Distribution that holds the histogram of the input image. */
- Distribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */
- Lut _cd_lut; /**< Holds the equalization lookuptable. */
- static constexpr uint32_t nr_bins{ 256 }; /**< Histogram bins of the internal histograms. */
- static constexpr uint32_t max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */
+ std::unique_ptr<NETableLookupKernel> _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */
+ Distribution1D _hist; /**< Distribution that holds the histogram of the input image. */
+ Distribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */
+ Lut _cd_lut; /**< Holds the equalization lookuptable. */
+ static constexpr uint32_t nr_bins{ 256 }; /**< Histogram bins of the internal histograms. */
+ static constexpr uint32_t max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */
};
}
#endif /*ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEFFT1D_H
#define ARM_COMPUTE_NEFFT1D_H
-#include "arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTScaleKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/FunctionDescriptors.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/Tensor.h"
+#include <memory>
+
namespace arm_compute
{
// Forward declaration
class ITensor;
+class NEFFTDigitReverseKernel;
+class NEFFTRadixStageKernel;
+class NEFFTScaleKernel;
/** Basic function to execute one dimensional FFT. This function calls the following NEON kernels:
*
public:
/** Default Constructor */
NEFFT1D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFFT1D(const NEFFT1D &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFFT1D &operator=(const NEFFT1D &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEFFT1D(NEFFT1D &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEFFT1D &operator=(NEFFT1D &&) = delete;
+ /** Default destructor */
+ ~NEFFT1D();
/** Initialise the function's source and destinations.
*
* @param[in] input Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
void run() override;
protected:
- MemoryGroup _memory_group;
- NEFFTDigitReverseKernel _digit_reverse_kernel;
- std::vector<NEFFTRadixStageKernel> _fft_kernels;
- NEFFTScaleKernel _scale_kernel;
- Tensor _digit_reversed_input;
- Tensor _digit_reverse_indices;
- unsigned int _num_ffts;
- unsigned int _axis;
- bool _run_scale;
+ MemoryGroup _memory_group;
+ std::unique_ptr<NEFFTDigitReverseKernel> _digit_reverse_kernel;
+ std::vector<std::unique_ptr<NEFFTRadixStageKernel>> _fft_kernels;
+ std::unique_ptr<NEFFTScaleKernel> _scale_kernel;
+ Tensor _digit_reversed_input;
+ Tensor _digit_reverse_indices;
+ unsigned int _num_ffts;
+ unsigned int _axis;
+ bool _run_scale;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEFFT1D_H */
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
public:
/** Default Constructor */
NEFFT2D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFFT2D(const NEFFT2D &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFFT2D &operator=(const NEFFT2D &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEFFT2D(NEFFT2D &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEFFT2D &operator=(NEFFT2D &&) = delete;
+ /** Default destructor */
+ ~NEFFT2D();
/** Initialise the function's source and destinations
*
* @param[in] input Source tensor. Data types supported: F32.
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
NEFFTConvolutionLayer &operator=(const NEFFTConvolutionLayer &) = delete;
/** Default move assignment operator */
NEFFTConvolutionLayer &operator=(NEFFTConvolutionLayer &&) = default;
+ /** Default destructor */
+ ~NEFFTConvolutionLayer();
/** Set the input and output tensors.
*
* @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEFASTCORNERS_H
#define ARM_COMPUTE_NEFASTCORNERS_H
-#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/Array.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
class ITensor;
+class NENonMaximaSuppression3x3Kernel;
+class NEFastCornersKernel;
+class NEFillBorderKernel;
+class NEFillArrayKernel;
using IImage = ITensor;
/** Basic function to execute fast corners. This function call the following NEON kernels:
public:
/** Constructor */
NEFastCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFastCorners(const NEFastCorners &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFastCorners &operator=(const NEFastCorners &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEFastCorners(NEFastCorners &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEFastCorners &operator=(NEFastCorners &&) = delete;
+ /** Default destructor */
+ ~NEFastCorners();
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
void run() override;
private:
- MemoryGroup _memory_group;
- NEFastCornersKernel _fast_corners_kernel;
- NEFillBorderKernel _border_handler;
- NENonMaximaSuppression3x3Kernel _nonmax_kernel;
- NEFillArrayKernel _fill_kernel;
- Image _output;
- Image _suppressed;
- bool _non_max;
+ MemoryGroup _memory_group;
+ std::unique_ptr<NEFastCornersKernel> _fast_corners_kernel;
+ std::unique_ptr<NEFillBorderKernel> _border_handler;
+ std::unique_ptr<NENonMaximaSuppression3x3Kernel> _nonmax_kernel;
+ std::unique_ptr<NEFillArrayKernel> _fill_kernel;
+ Image _output;
+ Image _suppressed;
+ bool _non_max;
};
}
#endif /*ARM_COMPUTE_NEFASTCORNERS_H */
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEFILL_H
#define ARM_COMPUTE_NEFILL_H
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEFILLBORDER_H
#define ARM_COMPUTE_NEFILLBORDER_H
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
namespace arm_compute
{
// Forward declaration
class ITensor;
+class NEFillBorderKernel;
/** Basic function to run @ref NEFillBorderKernel */
class NEFillBorder : public IFunction
void run() override;
private:
- NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */
+ std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEFILLBORDER_H */
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to execute flatten layer kernel. */
class NEFlattenLayer : public INESimpleFunctionNoBorder
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEFloorKernel */
class NEFloor : public INESimpleFunctionNoBorder
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
+#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
{
+class NEFlattenLayerKernel;
+
/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels:
- *
- * -# @ref NETransposeKernel
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
class NEFullyConnectedLayerReshapeWeights : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEFullyConnectedLayerReshapeWeights() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFullyConnectedLayerReshapeWeights(const NEFullyConnectedLayerReshapeWeights &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFullyConnectedLayerReshapeWeights &operator=(const NEFullyConnectedLayerReshapeWeights &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEFullyConnectedLayerReshapeWeights(NEFullyConnectedLayerReshapeWeights &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEFullyConnectedLayerReshapeWeights &operator=(NEFullyConnectedLayerReshapeWeights &&) = delete;
+ /** Default destructor */
+ ~NEFullyConnectedLayerReshapeWeights() = default;
/** Set the input and output tensors.
*
* @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
NEFullyConnectedLayer &operator=(const NEFullyConnectedLayer &) = delete;
/** Default move assignment operator */
NEFullyConnectedLayer &operator=(NEFullyConnectedLayer &&) = default;
+ /** Default destructor */
+ ~NEFullyConnectedLayer();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
MemoryGroup _memory_group;
IWeightsManager *_weights_manager;
- NEFlattenLayerKernel _flatten_kernel;
+ std::unique_ptr<NEFlattenLayerKernel> _flatten_kernel;
NEConvertFullyConnectedWeights _convert_weights;
weights_transformations::NEConvertFullyConnectedWeightsManaged _convert_weights_managed;
NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#define ARM_COMPUTE_NEFUSEBATCHNORMALIZATION_H
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
{
// Forward declarations
class ITensor;
+class NEFuseBatchNormalizationKernel;
/** Basic function to fuse the batch normalization node to a preceding convolution node */
class NEFuseBatchNormalization : public IFunction
/** Allow instances of this class to be moved */
NEFuseBatchNormalization &operator=(NEFuseBatchNormalization &&) = default;
/** Default destructor */
- ~NEFuseBatchNormalization() = default;
+ ~NEFuseBatchNormalization();
/** Set the input and output tensors.
*
* @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
void run() override;
private:
- NEFuseBatchNormalizationKernel _fuse_bn_kernel;
+ std::unique_ptr<NEFuseBatchNormalizationKernel> _fuse_bn_kernel;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEFUSEBATCHNORMALIZATION_H */
#ifndef ARM_COMPUTE_NEGEMM_H
#define ARM_COMPUTE_NEGEMM_H
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
#include "arm_compute/runtime/Tensor.h"
+#include <memory>
+
namespace arm_compute
{
+class NEGEMMInterleave4x4Kernel;
+class NEGEMMMatrixAdditionKernel;
+class NEGEMMMatrixMultiplyKernel;
+class NEGEMMTranspose1xWKernel;
/** Basic function to execute GEMM on NEON. This function calls the following NEON kernels:
*
* If optimized assembly is available:
NEGEMM &operator=(const NEGEMM &) = delete;
/** Default move assignment operator */
NEGEMM &operator=(NEGEMM &&) = default;
+ /** Default destructor */
+ ~NEGEMM();
/** Initialise the kernel's inputs, output
*
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
void prepare() override;
private:
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- NEGEMMInterleave4x4Kernel _interleave_kernel;
- NEGEMMTranspose1xWKernel _transpose_kernel;
- NEGEMMMatrixMultiplyKernel _mm_kernel;
- NEGEMMAssemblyDispatch _asm_glue;
- NEGEMMMatrixAdditionKernel _ma_kernel;
- NEActivationLayer _alpha_scale_func;
- NEArithmeticAddition _add_bias;
- NEActivationLayer _activation_func;
+ MemoryGroup _memory_group;
+ IWeightsManager *_weights_manager;
+ std::unique_ptr<NEGEMMInterleave4x4Kernel> _interleave_kernel;
+ std::unique_ptr<NEGEMMTranspose1xWKernel> _transpose_kernel;
+ std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel;
+ NEGEMMAssemblyDispatch _asm_glue;
+ std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel;
+ NEActivationLayer _alpha_scale_func;
+ NEArithmeticAddition _add_bias;
+ NEActivationLayer _activation_func;
Tensor _tmp_a;
Tensor _tmp_b;
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
namespace arm_compute
{
class ITensor;
+class NECol2ImKernel;
+class NEIm2ColKernel;
+class NEWeightsReshapeKernel;
/** Function to reshape the weights. This function calls the following kernel:
* -# @ref NEWeightsReshapeKernel
NEConvolutionLayerReshapeWeights &operator=(const NEConvolutionLayerReshapeWeights &) = delete;
/** Default move assignment operator */
NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = default;
+ /** Default destructor */
+ ~NEConvolutionLayerReshapeWeights();
/** Set the input and output tensors.
*
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
void run() override;
private:
- NEWeightsReshapeKernel _weights_reshape_kernel;
+ std::unique_ptr<NEWeightsReshapeKernel> _weights_reshape_kernel;
};
namespace weights_transformations
class NEConvolutionLayerReshapeWeightsTransform : public ITransformWeights
{
public:
+ /** Constructor */
+ NEConvolutionLayerReshapeWeightsTransform() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolutionLayerReshapeWeightsTransform(const NEConvolutionLayerReshapeWeightsTransform &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolutionLayerReshapeWeightsTransform &operator=(const NEConvolutionLayerReshapeWeightsTransform &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvolutionLayerReshapeWeightsTransform(NEConvolutionLayerReshapeWeightsTransform &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEConvolutionLayerReshapeWeightsTransform &operator=(NEConvolutionLayerReshapeWeightsTransform &&) = delete;
+ /** Default destructor */
+ ~NEConvolutionLayerReshapeWeightsTransform() = default;
void configure(const ITensor *input, const ITensor *biases)
{
_bias_bit = (biases != nullptr) ? 1 : 0;
NEGEMMConvolutionLayer &operator=(const NEGEMMConvolutionLayer &) = delete;
/** Default move assignment operator */
NEGEMMConvolutionLayer &operator=(NEGEMMConvolutionLayer &&) = default;
+ /** Default destructor */
+ ~NEGEMMConvolutionLayer();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
IWeightsManager *_weights_manager;
NEConvolutionLayerReshapeWeights _reshape_weights;
weights_transformations::NEConvolutionLayerReshapeWeightsTransform _reshape_weights_managed;
- NEIm2ColKernel _im2col_kernel;
+ std::unique_ptr<NEIm2ColKernel> _im2col_kernel;
NEGEMM _mm_gemm;
NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- NECol2ImKernel _col2im_kernel;
+ std::unique_ptr<NECol2ImKernel> _col2im_kernel;
NEReshapeLayer _reshape_layer;
const ITensor *_original_weights;
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H
#define ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
{
// Forward declarations
class ITensor;
+class NEGEMMInterleave4x4Kernel;
+class NEGEMMTranspose1xWKernel;
+class NEGEMMLowpMatrixMultiplyKernel;
/** Basic function to execute matrix multiply assembly kernels. */
class NEGEMMLowpAssemblyMatrixMultiplyCore : public IFunction
public:
/** Constructor */
NEGEMMLowpAssemblyMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Destructor */
+ ~NEGEMMLowpAssemblyMatrixMultiplyCore();
+
/** Initialise the kernel's inputs, output
*
* @param[in] a First input tensor (Matrix A). Data type supported: U8, S8.
void run() override;
private:
- MemoryGroup _memory_group;
- NEGEMMAssemblyDispatch _asm_glue;
- std::unique_ptr<INEKernel> _mm_kernel;
- std::unique_ptr<INEKernel> _mtx_a_reshape_kernel;
- std::unique_ptr<INEKernel> _mtx_b_reshape_kernel;
- Tensor _tmp_a;
- Tensor _tmp_b;
+ MemoryGroup _memory_group;
+ NEGEMMAssemblyDispatch _asm_glue;
+ std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel> _mm_kernel;
+ std::unique_ptr<NEGEMMInterleave4x4Kernel> _mtx_a_reshape_kernel;
+ std::unique_ptr<NEGEMMTranspose1xWKernel> _mtx_b_reshape_kernel;
+ Tensor _tmp_a;
+ Tensor _tmp_b;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H */
#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
#include "NEActivationLayer.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
namespace arm_compute
{
class ITensor;
+class NEConvertQuantizedSignednessKernel;
+class NEConvertQuantizedSignednessKernel;
+class NEGEMMInterleave4x4Kernel;
+class NEGEMMLowpMatrixMultiplyKernel;
+class NEGEMMLowpOffsetContributionKernel;
+class NEGEMMLowpOffsetContributionOutputStageKernel;
+class NEGEMMLowpMatrixAReductionKernel;
+class NEGEMMLowpMatrixBReductionKernel;
+class NEGEMMTranspose1xWKernel;
/** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available:
*
NEGEMMLowpMatrixMultiplyCore &operator=(const NEGEMMLowpMatrixMultiplyCore &) = delete;
/** Default move assignment operator */
NEGEMMLowpMatrixMultiplyCore &operator=(NEGEMMLowpMatrixMultiplyCore &&) = default;
+ /** Default destructor */
+ ~NEGEMMLowpMatrixMultiplyCore();
/** Initialise the kernel's inputs, output
*
* @note GEMM_LOWP: low precision GEMM kernel
void prepare() override;
private:
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- NEGEMMAssemblyDispatch _asm_glue;
- NEGEMMLowpMatrixMultiplyKernel _mm_kernel;
- NEGEMMInterleave4x4Kernel _mtx_a_reshape_kernel;
- NEGEMMTranspose1xWKernel _mtx_b_reshape_kernel;
- NEGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
- NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
- NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
- NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
- NEActivationLayer _activation_func;
- NEConvertQuantizedSignednessKernel _convert_to_signed_asymm;
- NEConvertQuantizedSignednessKernel _convert_from_signed_asymm;
+ MemoryGroup _memory_group;
+ IWeightsManager *_weights_manager;
+ NEGEMMAssemblyDispatch _asm_glue;
+ std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel> _mm_kernel;
+ std::unique_ptr<NEGEMMInterleave4x4Kernel> _mtx_a_reshape_kernel;
+ std::unique_ptr<NEGEMMTranspose1xWKernel> _mtx_b_reshape_kernel;
+ std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _mtx_a_reduction_kernel;
+ std::unique_ptr<NEGEMMLowpMatrixBReductionKernel> _mtx_b_reduction_kernel;
+ std::unique_ptr<NEGEMMLowpOffsetContributionKernel> _offset_contribution_kernel;
+ std::unique_ptr<NEGEMMLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel;
+ NEActivationLayer _activation_func;
+ std::unique_ptr<NEConvertQuantizedSignednessKernel> _convert_to_signed_asymm;
+ std::unique_ptr<NEConvertQuantizedSignednessKernel> _convert_from_signed_asymm;
Tensor _vector_sum_col;
Tensor _vector_sum_row;
#ifndef ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H
#define ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
/** This file contains all available output stages for GEMMLowp on NEON.
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on NEON.
*
class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &&) = delete;
+ /** Default destructor */
+ ~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint();
/** Initialise the kernel's inputs, output
*
* @param[in] input Input tensor. Data type supported: S32
class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &&) = delete;
+ /** Default destructor */
+ ~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint();
/** Initialise the kernel's inputs, output
*
* @param[in] input Input tensor. Data type supported: S32
class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &&) = delete;
+ /** Default destructor */
+ ~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint();
/** Initialise the kernel's inputs, output
*
* @param[in] input Input tensor. Data type supported: S32
class NEGEMMLowpOutputStage : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEGEMMLowpOutputStage() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMLowpOutputStage(const NEGEMMLowpOutputStage &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMLowpOutputStage &operator=(const NEGEMMLowpOutputStage &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEGEMMLowpOutputStage(NEGEMMLowpOutputStage &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEGEMMLowpOutputStage &operator=(NEGEMMLowpOutputStage &&) = delete;
+ /** Default destructor */
+ ~NEGEMMLowpOutputStage();
/** Initialise the kernel's inputs, output
*
* @param[in] input Input tensor. Data type supported: S32
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H
#define ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels:
*
class NEGEMMTranspose1xW : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEGEMMTranspose1xW() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMTranspose1xW(const NEGEMMTranspose1xW &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMTranspose1xW &operator=(const NEGEMMTranspose1xW &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEGEMMTranspose1xW(NEGEMMTranspose1xW &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEGEMMTranspose1xW &operator=(NEGEMMTranspose1xW &&) = delete;
+ /** Default destructor */
+ ~NEGEMMTranspose1xW() = default;
/** Initialise the kernel's inputs, output
*
* @param[in] input First input tensor. Data type supported: All
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEGatherKernel */
class NEGather : public INESimpleFunctionNoBorder
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEGAUSSIAN5x5_H
#define ARM_COMPUTE_NEGAUSSIAN5x5_H
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
namespace arm_compute
{
class ITensor;
+class NEGaussian5x5HorKernel;
+class NEGaussian5x5VertKernel;
+class NEFillBorderKernel;
/** Basic function to execute gaussian filter 5x5. This function calls the following NEON kernels:
*
/** Default constructor
*/
NEGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussian5x5(const NEGaussian5x5 &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussian5x5 &operator=(const NEGaussian5x5 &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGaussian5x5(NEGaussian5x5 &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGaussian5x5 &operator=(NEGaussian5x5 &&) = default;
+ /** Default destructor */
+ ~NEGaussian5x5();
/** Initialise the function's input, output and border mode.
*
* @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
void run() override;
protected:
- MemoryGroup _memory_group; /**< Function memory group */
- NEGaussian5x5HorKernel _kernel_hor; /**< kernel for horizontal pass */
- NEGaussian5x5VertKernel _kernel_vert; /**< kernel for vertical pass */
- Tensor _tmp; /**< temporary buffer for output of horizontal pass */
- NEFillBorderKernel _border_handler; /**< kernel to handle tensor borders */
+ MemoryGroup _memory_group; /**< Function memory group */
+ std::unique_ptr<NEGaussian5x5HorKernel> _kernel_hor; /**< kernel for horizontal pass */
+ std::unique_ptr<NEGaussian5x5VertKernel> _kernel_vert; /**< kernel for vertical pass */
+ Tensor _tmp; /**< temporary buffer for output of horizontal pass */
+ std::unique_ptr<NEFillBorderKernel> _border_handler; /**< kernel to handle tensor borders */
};
}
#endif /*ARM_COMPUTE_NEGAUSSIAN5x5_H */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#define ARM_COMPUTE_NEGAUSSIANPYRAMID_H
#include "arm_compute/core/IPyramid.h"
-#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
namespace arm_compute
{
class ITensor;
+class NEGaussianPyramidHorKernel;
+class NEGaussianPyramidVertKernel;
+class NEFillBorderKernel;
/** Common interface for all Gaussian pyramid functions */
class NEGaussianPyramid : public IFunction
public:
/** Constructor */
NEGaussianPyramidHalf();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussianPyramidHalf(const NEGaussianPyramidHalf &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussianPyramidHalf &operator=(const NEGaussianPyramidHalf &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGaussianPyramidHalf(NEGaussianPyramidHalf &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGaussianPyramidHalf &operator=(NEGaussianPyramidHalf &&) = default;
+ /** Default destructor */
+ ~NEGaussianPyramidHalf();
// Inherited methods overridden:
void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
void run() override;
private:
- std::vector<NEFillBorderKernel> _horizontal_border_handler;
- std::vector<NEFillBorderKernel> _vertical_border_handler;
- std::vector<NEGaussianPyramidHorKernel> _horizontal_reduction;
- std::vector<NEGaussianPyramidVertKernel> _vertical_reduction;
+ std::vector<std::unique_ptr<NEFillBorderKernel>> _horizontal_border_handler;
+ std::vector<std::unique_ptr<NEFillBorderKernel>> _vertical_border_handler;
+ std::vector<std::unique_ptr<NEGaussianPyramidHorKernel>> _horizontal_reduction;
+ std::vector<std::unique_ptr<NEGaussianPyramidVertKernel>> _vertical_reduction;
};
/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following NEON kernels and functions:
public:
/** Constructor */
NEGaussianPyramidOrb();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussianPyramidOrb(const NEGaussianPyramidOrb &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussianPyramidOrb &operator=(const NEGaussianPyramidOrb &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGaussianPyramidOrb(NEGaussianPyramidOrb &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGaussianPyramidOrb &operator=(NEGaussianPyramidOrb &&) = default;
+ /** Default destructor */
+ ~NEGaussianPyramidOrb();
// Inherited methods overridden:
void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
#ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H
#define ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H
-#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h"
-#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CPP/CPPScheduler.h"
#include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h"
+#include "arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h"
+#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPadLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPermute.h"
+#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
#include "arm_compute/runtime/Tensor.h"
NEGenerateProposalsLayer(const NEGenerateProposalsLayer &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEGenerateProposalsLayer &operator=(const NEGenerateProposalsLayer &) = delete;
+ /** Default destructor */
+ ~NEGenerateProposalsLayer();
/** Set the input and output tensors.
*
MemoryGroup _memory_group;
// Neon kernels
- NEPermuteKernel _permute_deltas_kernel;
- NEReshapeLayer _flatten_deltas;
- NEPermuteKernel _permute_scores_kernel;
- NEReshapeLayer _flatten_scores;
- NEComputeAllAnchorsKernel _compute_anchors_kernel;
- NEBoundingBoxTransformKernel _bounding_box_kernel;
- NEPadLayerKernel _pad_kernel;
- NEDequantizationLayerKernel _dequantize_anchors;
- NEDequantizationLayerKernel _dequantize_deltas;
- NEQuantizationLayerKernel _quantize_all_proposals;
+ NEPermute _permute_deltas;
+ NEReshapeLayer _flatten_deltas;
+ NEPermute _permute_scores;
+ NEReshapeLayer _flatten_scores;
+ NEComputeAllAnchors _compute_anchors;
+ NEBoundingBoxTransform _bounding_box;
+ NEPadLayer _pad;
+ NEDequantizationLayer _dequantize_anchors;
+ NEDequantizationLayer _dequantize_deltas;
+ NEQuantizationLayer _quantize_all_proposals;
// CPP functions
CPPBoxWithNonMaximaSuppressionLimit _cpp_nms;
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEHOGDESCRIPTOR_H
#define ARM_COMPUTE_NEHOGDESCRIPTOR_H
-#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
namespace arm_compute
{
class IHOG;
+class NEHOGOrientationBinningKernel;
+class NEHOGBlockNormalizationKernel;
+
/** Basic function to calculate HOG descriptor. This function calls the following NEON kernels:
*
* -# @ref NEHOGGradient
public:
/** Default constructor */
NEHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHOGDescriptor(const NEHOGDescriptor &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHOGDescriptor &operator=(const NEHOGDescriptor &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEHOGDescriptor(NEHOGDescriptor &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEHOGDescriptor &operator=(NEHOGDescriptor &&) = delete;
+ /** Default destructor */
+ ~NEHOGDescriptor();
/** Initialise the function's source, destination, HOG data-object and border mode
*
* @param[in, out] input Input tensor. Data type supported: U8
void run() override;
private:
- MemoryGroup _memory_group;
- NEHOGGradient _gradient;
- NEHOGOrientationBinningKernel _orient_bin;
- NEHOGBlockNormalizationKernel _block_norm;
- Tensor _mag;
- Tensor _phase;
- Tensor _hog_space;
+ MemoryGroup _memory_group;
+ NEHOGGradient _gradient;
+ std::unique_ptr<NEHOGOrientationBinningKernel> _orient_bin;
+ std::unique_ptr<NEHOGBlockNormalizationKernel> _block_norm;
+ Tensor _mag;
+ Tensor _phase;
+ Tensor _hog_space;
};
}
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEHOGDETECTOR_H
#define ARM_COMPUTE_NEHOGDETECTOR_H
+#include "arm_compute/core/IArray.h"
#include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
+class ITensor;
+class ITensorInfo;
/** Basic function to execute HOG detector based on linear SVM. This function calls the following NEON kernel:
*
* -# @ref NEHOGDetectorKernel
class NEHOGDetector : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEHOGDetector() = default;
+ /** Prevent instances of this class from being copied */
+ NEHOGDetector(const NEHOGDetector &) = delete;
+ /** Default move constructor */
+ NEHOGDetector(NEHOGDetector &&) = default;
+ /** Prevent instances of this class from being copied */
+ NEHOGDetector &operator=(const NEHOGDetector &) = delete;
+ /** Default move assignment operator */
+ NEHOGDetector &operator=(NEHOGDetector &&) = default;
+ /** Destructor */
+ ~NEHOGDetector();
/** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
*
* @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEHOGGRADIENT_H
#define ARM_COMPUTE_NEHOGGRADIENT_H
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
namespace arm_compute
{
class ITensor;
+class ICPPKernel;
+
/** Basic function to calculate the gradient for HOG. This function calls the following NEON kernels:
*
* -# @ref NEDerivative
public:
/** Default constructor */
NEHOGGradient(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHOGGradient(const NEHOGGradient &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHOGGradient &operator=(const NEHOGGradient &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEHOGGradient(NEHOGGradient &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEHOGGradient &operator=(NEHOGGradient &&) = delete;
+ /** Default destructor */
+ ~NEHOGGradient();
/** Initialise the function's source, destinations, phase type and border mode
*
* @param[in, out] input Input tensor. Data type supported: U8.
void run() override;
private:
- MemoryGroup _memory_group;
- NEDerivative _derivative;
- std::unique_ptr<INEKernel> _mag_phase;
- Tensor _gx;
- Tensor _gy;
+ MemoryGroup _memory_group;
+ NEDerivative _derivative;
+ std::unique_ptr<ICPPKernel> _mag_phase;
+ Tensor _gx;
+ Tensor _gy;
};
}
#endif /*ARM_COMPUTE_NEHOGGRADIENT_H */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
#include "arm_compute/core/IArray.h"
#include "arm_compute/core/IMultiHOG.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
namespace arm_compute
{
+class NEHOGOrientationBinningKernel;
+class NEHOGBlockNormalizationKernel;
+
/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following NEON kernels:
*
* -# @ref NEHOGGradient
NEHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEHOGMultiDetection(const NEHOGMultiDetection &) = delete;
+ /** Default move constructor */
+ NEHOGMultiDetection(NEHOGMultiDetection &&) = default;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEHOGMultiDetection &operator=(const NEHOGMultiDetection &) = delete;
+ /** Default move assignment operator */
+ NEHOGMultiDetection &operator=(NEHOGMultiDetection &&) = default;
+ /** Default destructor */
+ ~NEHOGMultiDetection();
/** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
*
* @param[in, out] input Input tensor. Data type supported: U8
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/Array.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
class ITensor;
+class NEFillBorderKernel;
+class INEHarrisScoreKernel;
using IImage = ITensor;
/** Basic function to execute harris corners detection. This function calls the following NEON kernels and functions:
* @param[in] memory_manager (Optional) Memory manager.
*/
NEHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHarrisCorners(const NEHarrisCorners &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHarrisCorners &operator=(const NEHarrisCorners &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEHarrisCorners(NEHarrisCorners &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEHarrisCorners &operator=(NEHarrisCorners &&) = delete;
+ /** Default destructor */
+ ~NEHarrisCorners();
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
NENonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */
CPPCornerCandidatesKernel _candidates; /**< Sort kernel */
CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */
- NEFillBorderKernel _border_gx; /**< Border handler before running harris score */
- NEFillBorderKernel _border_gy; /**< Border handler before running harris score */
+ std::unique_ptr<NEFillBorderKernel> _border_gx; /**< Border handler before running harris score */
+ std::unique_ptr<NEFillBorderKernel> _border_gy; /**< Border handler before running harris score */
Image _gx; /**< Source image - Gx component */
Image _gy; /**< Source image - Gy component */
Image _score; /**< Source image - Harris score */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEHISTOGRAM_H
#define ARM_COMPUTE_NEHISTOGRAM_H
-#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include <cstddef>
#include <cstdint>
#include <memory>
+#include <vector>
namespace arm_compute
{
+class ITensor;
class IDistribution1D;
+class NEHistogramKernel;
+using IImage = ITensor;
/** Basic function to run @ref NEHistogramKernel. */
class NEHistogram : public IFunction
public:
/** Default Constructor. */
NEHistogram();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHistogram(const NEHistogram &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHistogram &operator=(const NEHistogram &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEHistogram(NEHistogram &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEHistogram &operator=(NEHistogram &&) = delete;
+ /** Default destructor */
+ ~NEHistogram();
/** Initialise the kernel's inputs.
*
* @param[in] input Input image. Data type supported: U8.
void run() override;
private:
- NEHistogramKernel _histogram_kernel;
- std::vector<uint32_t> _local_hist;
- std::vector<uint32_t> _window_lut;
- size_t _local_hist_size;
+ std::unique_ptr<NEHistogramKernel> _histogram_kernel;
+ std::vector<uint32_t> _local_hist;
+ std::vector<uint32_t> _window_lut;
+ size_t _local_hist_size;
/** 256 possible pixel values as we handle only U8 images */
static constexpr unsigned int window_lut_default_size = 256;
};
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
+class NEIm2ColKernel;
/** Basic function to run @ref NEIm2ColKernel */
class NEIm2Col : public IFunction
public:
/** Default constructor */
NEIm2Col();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEIm2Col(const NEIm2Col &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEIm2Col &operator=(const NEIm2Col &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEIm2Col(NEIm2Col &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEIm2Col &operator=(NEIm2Col &&) = delete;
+ /** Default destructor */
+ ~NEIm2Col();
/** Configure the im2col NEON kernel
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
void run() override;
private:
- NEIm2ColKernel _kernel;
- unsigned int _y_dim;
+ std::unique_ptr<NEIm2ColKernel> _kernel;
+ unsigned int _y_dim;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEIM2COL_H */
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H
#define ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H
-#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
namespace arm_compute
{
class ITensor;
+class NEInstanceNormalizationLayerKernel;
/** Basic function to perform a Instance normalization.
*
public:
/** Constructor */
NEInstanceNormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEInstanceNormalizationLayer(const NEInstanceNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEInstanceNormalizationLayer &operator=(const NEInstanceNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEInstanceNormalizationLayer(NEInstanceNormalizationLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEInstanceNormalizationLayer &operator=(NEInstanceNormalizationLayer &&) = delete;
+ /** Default destructor */
+ ~NEInstanceNormalizationLayer();
/** Set the input and output tensors.
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
void run() override;
private:
- MemoryGroup _memory_group;
- NEInstanceNormalizationLayerKernel _normalization_kernel;
- bool _is_nchw;
- NEPermute _permute_input;
- NEPermute _permute_output;
- Tensor _permuted_input;
- Tensor _permuted_output;
+ MemoryGroup _memory_group;
+ std::unique_ptr<NEInstanceNormalizationLayerKernel> _normalization_kernel;
+ bool _is_nchw;
+ NEPermute _permute_input;
+ NEPermute _permute_output;
+ Tensor _permuted_input;
+ Tensor _permuted_output;
};
}
#endif /* ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
class NEIntegralImage : public INESimpleFunction
{
public:
+ /** Constructor */
+ NEIntegralImage() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEIntegralImage(const NEIntegralImage &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEIntegralImage &operator=(const NEIntegralImage &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEIntegralImage(NEIntegralImage &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEIntegralImage &operator=(NEIntegralImage &&) = delete;
+ /** Default destructor */
+ ~NEIntegralImage();
/** Initialise the function's source, destinations and border mode.
*
* @param[in] input Source tensor. Data type supported: U8.
#ifndef ARM_COMPUTE_NEL2NORMALIZELAYER_H
#define ARM_COMPUTE_NEL2NORMALIZELAYER_H
-#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
namespace arm_compute
{
class ITensor;
+class NEL2NormalizeLayerKernel;
/** Basic function to perform a L2 normalization on a given axis.
*
public:
/** Constructor */
NEL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEL2NormalizeLayer(const NEL2NormalizeLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEL2NormalizeLayer &operator=(const NEL2NormalizeLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEL2NormalizeLayer(NEL2NormalizeLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEL2NormalizeLayer &operator=(NEL2NormalizeLayer &&) = delete;
+ /** Default destructor */
+ ~NEL2NormalizeLayer();
/** Set the input and output tensors.
*
* @param[in, out] input Source tensor. Data types supported: F16/F32. (Written to only for border_size != 0)
void run() override;
private:
- MemoryGroup _memory_group;
- NEReductionOperation _reduce_func;
- NEL2NormalizeLayerKernel _normalize_kernel;
- Tensor _sumsq;
+ MemoryGroup _memory_group;
+ NEReductionOperation _reduce_func;
+ std::unique_ptr<NEL2NormalizeLayerKernel> _normalize_kernel;
+ Tensor _sumsq;
};
}
#endif /* ARM_COMPUTE_NEL2NORMALIZELAYER_H */
#ifndef ARM_COMPUTE_NELSTMLAYER_H
#define ARM_COMPUTE_NELSTMLAYER_H
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
+#include "arm_compute/runtime/NEON/functions/NECopy.h"
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
#include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
+#include "arm_compute/runtime/NEON/functions/NETranspose.h"
#include "arm_compute/runtime/common/LSTMParams.h"
namespace arm_compute
public:
/** Default constructor */
NELSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELSTMLayer(const NELSTMLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELSTMLayer &operator=(const NELSTMLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELSTMLayer(NELSTMLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELSTMLayer &operator=(NELSTMLayer &&) = delete;
+ /** Default destructor */
+ ~NELSTMLayer();
/** Initialize function's tensors.
*
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
NEActivationLayer _activation_forget_gate;
NEFullyConnectedLayer _fully_connected_cell_state;
NEGEMM _gemm_cell_state1;
- NETransposeKernel _transpose_cell_state;
+ NETranspose _transpose_cell_state;
NEArithmeticAddition _accum_cell_state1;
NEArithmeticAddition _accum_cell_state2;
NEPixelWiseMultiplication _pixelwise_mul_cell_state1;
NEPixelWiseMultiplication _pixelwise_mul_output_state2;
NEFullyConnectedLayer _fully_connected_output_state;
NEActivationLayer _projection_clip;
- NECopyKernel _copy_cell_state;
- NECopyKernel _copy_output;
+ NECopy _copy_cell_state;
+ NECopy _copy_output;
NEConcatenateLayer _concat_scratch_buffer;
NEConcatenateLayer _concat_inputs_forget_gate;
NEConcatenateLayer _concat_weights_forget_gate;
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
NELSTMLayerQuantized &operator=(const NELSTMLayerQuantized &) = delete;
/** Default move assignment operator */
NELSTMLayerQuantized &operator=(NELSTMLayerQuantized &&) = default;
+ /** Default destructor */
+ ~NELSTMLayerQuantized();
/** Initialize function's tensors.
*
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
public:
/** Constructor */
NELaplacianPyramid();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELaplacianPyramid(const NELaplacianPyramid &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELaplacianPyramid &operator=(const NELaplacianPyramid &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELaplacianPyramid(NELaplacianPyramid &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELaplacianPyramid &operator=(NELaplacianPyramid &&) = delete;
+ /** Default destructor */
+ ~NELaplacianPyramid();
/** Initialise the function's source, destinations and border mode.
*
* @param[in] input Source tensor. Data type supported: U8.
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
public:
/** Constructor */
NELaplacianReconstruct();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELaplacianReconstruct(const NELaplacianReconstruct &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELaplacianReconstruct &operator=(const NELaplacianReconstruct &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELaplacianReconstruct(NELaplacianReconstruct &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NELaplacianReconstruct &operator=(NELaplacianReconstruct &&) = delete;
+ /** Default destructor */
+ ~NELaplacianReconstruct();
/** Initialise the function's source, destinations and border mode.
*
* The Output image must have the same size as the first level of the pyramid.
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NECol2Im.h"
+#include "arm_compute/runtime/NEON/functions/NEIm2Col.h"
#include "arm_compute/runtime/Tensor.h"
#include <memory>
namespace arm_compute
{
class INETensor;
+class NEWeightsReshapeKernel;
+class NELocallyConnectedMatrixMultiplyKernel;
/** Basic function to compute the locally connected layer. This function calls the following NEON kernels:
*
NELocallyConnectedLayer &operator=(const NELocallyConnectedLayer &) = delete;
/** Default move assignment operator */
NELocallyConnectedLayer &operator=(NELocallyConnectedLayer &&) = default;
+ /** Default destructor */
+ ~NELocallyConnectedLayer();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
void prepare() override;
private:
- MemoryGroup _memory_group;
- NEIm2ColKernel _input_im2col_kernel;
- NEWeightsReshapeKernel _weights_reshape_kernel;
- NELocallyConnectedMatrixMultiplyKernel _mm_kernel;
- NECol2ImKernel _output_col2im_kernel;
- Tensor _input_im2col_reshaped;
- Tensor _weights_reshaped;
- Tensor _gemm_output;
- bool _is_prepared;
- const ITensor *_original_weights;
+ MemoryGroup _memory_group;
+ NEIm2Col _input_im2col;
+ std::unique_ptr<NEWeightsReshapeKernel> _weights_reshape_kernel;
+ std::unique_ptr<NELocallyConnectedMatrixMultiplyKernel> _mm_kernel;
+ NECol2Im _output_col2im;
+ Tensor _input_im2col_reshaped;
+ Tensor _weights_reshaped;
+ Tensor _gemm_output;
+ bool _is_prepared;
+ const ITensor *_original_weights;
};
}
#endif /* ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEMAGNITUDE_H
#define ARM_COMPUTE_NEMAGNITUDE_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
class NEMagnitude : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEMagnitude() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMagnitude(const NEMagnitude &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMagnitude &operator=(const NEMagnitude &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMagnitude(NEMagnitude &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMagnitude &operator=(NEMagnitude &&) = delete;
+ /** Default destructor */
+ ~NEMagnitude();
/** Initialise the kernel's inputs.
*
* @param[in] input1 First tensor input. Data type supported: S16.
#ifndef ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H
#define ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
+class NEMemsetKernel;
+class NEMaxUnpoolingLayerKernel;
/** Function to perform MaxUnpooling. This function calls the following NEON kernels:
*
public:
/** Constructor */
NEMaxUnpoolingLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMaxUnpoolingLayer(const NEMaxUnpoolingLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMaxUnpoolingLayer &operator=(const NEMaxUnpoolingLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMaxUnpoolingLayer(NEMaxUnpoolingLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMaxUnpoolingLayer &operator=(NEMaxUnpoolingLayer &&) = delete;
+ /** Default destructor */
+ ~NEMaxUnpoolingLayer();
/** Set the input and output tensors.
*
* @note Only supported pool size 2
void run() override;
private:
- NEMemsetKernel _memset_kernel;
- NEMaxUnpoolingLayerKernel _unpooling_layer_kernel;
+ std::unique_ptr<NEMemsetKernel> _memset_kernel;
+ std::unique_ptr<NEMaxUnpoolingLayerKernel> _unpooling_layer_kernel;
};
}
#endif /* ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEMEANSTDDEV_H
#define ARM_COMPUTE_NEMEANSTDDEV_H
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h"
+#include "arm_compute/core/IMultiImage.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
#include <cstdint>
namespace arm_compute
{
+class NEMeanStdDevKernel;
+class NEFillBorderKernel;
+
/** Basic function to execute mean and std deviation. This function calls the following NEON kernels:
*
* @ref NEMeanStdDevKernel
public:
/** Default Constructor. */
NEMeanStdDev();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMeanStdDev(const NEMeanStdDev &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMeanStdDev &operator=(const NEMeanStdDev &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMeanStdDev(NEMeanStdDev &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMeanStdDev &operator=(NEMeanStdDev &&) = delete;
+ /** Default destructor */
+ ~NEMeanStdDev();
/** Initialise the kernel's inputs and outputs.
*
* @param[in, out] input Input image. Data types supported: U8. (Written to only for border filling)
void run() override;
private:
- NEMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
- NEFillBorderKernel _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */
- uint64_t _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */
- uint64_t _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
+ std::unique_ptr<NEMeanStdDevKernel> _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
+ std::unique_ptr<NEFillBorderKernel> _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */
+ uint64_t _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */
+ uint64_t _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
};
}
#endif /*ARM_COMPUTE_NEMEANSTDDEV_H */
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to execute mean and standard deviation normalization by calling @ref NEMeanStdDevNormalizationKernel */
class NEMeanStdDevNormalizationLayer : public INESimpleFunctionNoBorder
{
public:
+ /** Constructor */
+ NEMeanStdDevNormalizationLayer() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMeanStdDevNormalizationLayer(const NEMeanStdDevNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMeanStdDevNormalizationLayer &operator=(const NEMeanStdDevNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMeanStdDevNormalizationLayer(NEMeanStdDevNormalizationLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMeanStdDevNormalizationLayer &operator=(NEMeanStdDevNormalizationLayer &&) = delete;
+ /** Default destructor */
+ ~NEMeanStdDevNormalizationLayer();
/** Initialise the function's input and outputs.
*
* @note If the output tensor is a nullptr, the normalization will be performed in-place.
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#define ARM_COMPUTE_NEMINMAXLOCATION_H
#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h"
#include "arm_compute/runtime/Array.h"
#include "arm_compute/runtime/IFunction.h"
#include <cstdint>
+#include <memory>
namespace arm_compute
{
class ITensor;
+class NEMinMaxKernel;
+class NEMinMaxLocationKernel;
using IImage = ITensor;
/** Basic function to execute min and max location. This function calls the following NEON kernels:
public:
/** Constructor */
NEMinMaxLocation();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMinMaxLocation(const NEMinMaxLocation &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMinMaxLocation &operator=(const NEMinMaxLocation &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMinMaxLocation(NEMinMaxLocation &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMinMaxLocation &operator=(NEMinMaxLocation &&) = delete;
+ /** Default destructor */
+ ~NEMinMaxLocation();
/** Initialise the kernel's inputs and outputs.
*
* @param[in] input Input image. Data types supported: U8/S16/F32.
void run() override;
private:
- NEMinMaxKernel _min_max; /**< Kernel that performs min/max */
- NEMinMaxLocationKernel _min_max_loc; /**< Kernel that extracts min/max locations */
+ std::unique_ptr<NEMinMaxKernel> _min_max; /**< Kernel that performs min/max */
+ std::unique_ptr<NEMinMaxLocationKernel> _min_max_loc; /**< Kernel that extracts min/max locations */
};
}
#endif /*ARM_COMPUTE_NEMINMAXLOCATION_H */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
namespace arm_compute
{
class ITensor;
+class NENormalizationLayerKernel;
/** Basic function to compute a normalization layer. This function calls the following NEON kernels:
*
public:
/** Default constructor */
NENormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NENormalizationLayer(const NENormalizationLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NENormalizationLayer &operator=(const NENormalizationLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NENormalizationLayer(NENormalizationLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NENormalizationLayer &operator=(NENormalizationLayer &&) = delete;
+ /** Default destructor */
+ ~NENormalizationLayer();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
void run() override;
private:
- MemoryGroup _memory_group; /**< Function memory group */
- NENormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel */
- NEPixelWiseMultiplication _multiply_f; /**< Pixel multiplication function */
- Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */
+ MemoryGroup _memory_group; /**< Function memory group */
+ std::unique_ptr<NENormalizationLayerKernel> _norm_kernel; /**< Normalization layer kernel */
+ NEPixelWiseMultiplication _multiply_f; /**< Pixel multiplication function */
+ Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */
};
}
#endif /* ARM_COMPUTE_NENORMALIZATIONLAYER_H */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#define ARM_COMPUTE_NEOPTICALFLOW_H
#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
+#include "arm_compute/core/Types.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/Array.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
class Pyramid;
+class NELKTrackerKernel;
/** Array of LK Internel Keypoints */
using LKInternalKeypointArray = Array<NELKInternalKeypoint>;
NEOpticalFlow(const NEOpticalFlow &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEOpticalFlow &operator=(const NEOpticalFlow &) = delete;
+ /** Default destructor */
+ ~NEOpticalFlow();
/** Initialise the function input and output
*
* @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data type supported U8
void run() override;
private:
- MemoryGroup _memory_group;
- std::vector<NEScharr3x3> _func_scharr;
- std::vector<NELKTrackerKernel> _kernel_tracker;
- std::vector<Tensor> _scharr_gx;
- std::vector<Tensor> _scharr_gy;
- IKeyPointArray *_new_points;
- const IKeyPointArray *_new_points_estimates;
- const IKeyPointArray *_old_points;
- LKInternalKeypointArray _new_points_internal;
- LKInternalKeypointArray _old_points_internal;
- unsigned int _num_levels;
+ MemoryGroup _memory_group;
+ std::vector<NEScharr3x3> _func_scharr;
+ std::vector<std::unique_ptr<NELKTrackerKernel>> _kernel_tracker;
+ std::vector<Tensor> _scharr_gx;
+ std::vector<Tensor> _scharr_gy;
+ IKeyPointArray *_new_points;
+ const IKeyPointArray *_new_points_estimates;
+ const IKeyPointArray *_old_points;
+ LKInternalKeypointArray _new_points_internal;
+ LKInternalKeypointArray _old_points_internal;
+ unsigned int _num_levels;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEOPTICALFLOW_H */
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
namespace experimental
{
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/NEON/functions/NEStridedSlice.h"
#include "arm_compute/runtime/SubTensor.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/Tensor.h"
+#include <memory>
namespace arm_compute
{
+class NECopyKernel;
+class NEPadLayerKernel;
+
/** Basic function to pad a tensor. This function calls the following NEON functions/kernels:
*
* - For padding mode = PaddingMode::CONSTANT:
class NEPadLayer : public IFunction
{
public:
- /** Default constructor*/
+ /** Default Constructor */
NEPadLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPadLayer(const NEPadLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPadLayer &operator=(const NEPadLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEPadLayer(NEPadLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEPadLayer &operator=(NEPadLayer &&) = delete;
+ /** Default destructor */
+ ~NEPadLayer();
/** Initialize the function
*
* @param[in] input Source tensor. Data types supported: All.
void configure_reflect_symmetric_mode(ITensor *input, ITensor *output);
private:
- NECopyKernel _copy_kernel;
- NEPadLayerKernel _pad_kernel;
- PaddingMode _mode;
- PaddingList _padding;
- uint32_t _num_dimensions;
- std::vector<NEStridedSlice> _slice_functions;
- std::vector<NEConcatenateLayer> _concat_functions;
- std::vector<Tensor> _slice_results;
- std::vector<Tensor> _concat_results;
+ std::unique_ptr<NECopyKernel> _copy_kernel;
+ std::unique_ptr<NEPadLayerKernel> _pad_kernel;
+ PaddingMode _mode;
+ PaddingList _padding;
+ uint32_t _num_dimensions;
+ std::vector<NEStridedSlice> _slice_functions;
+ std::vector<NEConcatenateLayer> _concat_functions;
+ std::vector<Tensor> _slice_results;
+ std::vector<Tensor> _concat_results;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEPADLAYER_H */
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEPermuteKernel */
class NEPermute : public INESimpleFunctionNoBorder
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEPHASE_H
#define ARM_COMPUTE_NEPHASE_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEMagnitudePhaseKernel */
class NEPhase : public INESimpleFunctionNoBorder
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
namespace experimental
{
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
+class NEPoolingLayerKernel;
+class NEFillBorderKernel;
/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following NEON kernels:
*
public:
/** Constructor */
NEPoolingLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPoolingLayer(const NEPoolingLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPoolingLayer &operator=(const NEPoolingLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEPoolingLayer(NEPoolingLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEPoolingLayer &operator=(NEPoolingLayer &&) = delete;
+ /** Default destructor */
+ ~NEPoolingLayer();
/** Set the input and output tensors.
*
* @note F16 is supported for pool sizes 2 and 3 only
void run() override;
private:
- NEPoolingLayerKernel _pooling_layer_kernel;
- NEFillBorderKernel _border_handler;
- bool _is_global_pooling_layer;
- DataLayout _data_layout;
+ std::unique_ptr<NEPoolingLayerKernel> _pooling_layer_kernel;
+ std::unique_ptr<NEFillBorderKernel> _border_handler;
+ bool _is_global_pooling_layer;
+ DataLayout _data_layout;
};
}
#endif /* ARM_COMPUTE_NEPOOLINGLAYER_H */
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NEPRIORBOXLAYER_H
#define ARM_COMPUTE_NEPRIORBOXLAYER_H
-#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEPriorBoxLayerKernel. */
class NEPriorBoxLayer : public INESimpleFunctionNoBorder
#ifndef ARM_COMPUTE_NEQLSTMLAYER_H
#define ARM_COMPUTE_NEQLSTMLAYER_H
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
+#include "arm_compute/runtime/NEON/functions/NECopy.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
#include "arm_compute/runtime/NEON/functions/NETranspose.h"
+#include "support/MemorySupport.h"
#include "arm_compute/runtime/common/LSTMParams.h"
+#include <memory>
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
+class NEQLSTMLayerNormalizationKernel;
+class NEGEMMLowpMatrixAReductionKernel;
/** Basic function to run @ref NEQLSTMLayer
*
NEQLSTMLayer &operator=(const NEQLSTMLayer &) = delete;
/** Default move assignment operator */
NEQLSTMLayer &operator=(NEQLSTMLayer &&) = default;
+ /** Default destructor */
+ ~NEQLSTMLayer();
/** Initialize function's tensors.
*
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
Tensor *outstage_res, float gemmlowp_scale,
const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info);
- MemoryGroup _memory_group{};
+ MemoryGroup _memory_group;
/** A small internel kernel do the copy between two tensors */
class TensorCopyKernel
Window _window{};
public:
+ /** Destructor */
+ ~TensorCopyKernel();
/** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer::TensorCopyKernel
*
* @param[in] src Source tensor info.
};
// Functions used
- NETranspose _transpose_input_to_forget_weights{};
- NETranspose _transpose_input_to_cell_weights{};
- NETranspose _transpose_input_to_output_weights{};
- NETranspose _transpose_input_to_input_weights{};
- NETranspose _transpose_recurrent_to_forget_weights{};
- NETranspose _transpose_recurrent_to_cell_weights{};
- NETranspose _transpose_recurrent_to_output_weights{};
- NETranspose _transpose_recurrent_to_input_weights{};
- NETranspose _transpose_projection_weights{};
- NEGEMMLowpMatrixAReductionKernel _input_to_input_reduction{};
- NEGEMMLowpMatrixAReductionKernel _recurrent_to_input_reduction{};
- NEGEMMLowpMatrixAReductionKernel _input_to_forget_reduction{};
- NEGEMMLowpMatrixAReductionKernel _recurrent_to_forget_reduction{};
- NEGEMMLowpMatrixAReductionKernel _input_to_cell_reduction{};
- NEGEMMLowpMatrixAReductionKernel _recurrent_to_cell_reduction{};
- NEGEMMLowpMatrixAReductionKernel _input_to_output_reduction{};
- NEGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{};
- NEGEMMLowpMatrixAReductionKernel _projection_reduction{};
- NEArithmeticAddition _projection_bias_add{};
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget{};
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{};
- NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget{};
- NEGEMMLowpOutputStage _input_to_forget_outstage{};
- NEGEMMLowpOutputStage _recurrent_to_forget_outstage{};
- NEGEMMLowpOutputStage _cell_to_forget_outstage{};
- NEArithmeticAddition _accumulate_input_recurrent_forget{};
- NEArithmeticAddition _accumulate_cell_forget{};
- NEActivationLayer _forget_gate_sigmoid{};
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell{};
- NEGEMMLowpOutputStage _input_to_cell_outstage{};
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{};
- NEGEMMLowpOutputStage _recurrent_to_cell_outstage{};
- NEArithmeticAddition _accumulate_input_recurrent_modulation{};
- NEActivationLayer _cell_gate_tanh{};
- NEArithmeticSubtraction _input_gate_sub{};
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_input{};
- NEGEMMLowpOutputStage _input_to_input_outstage{};
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{};
- NEGEMMLowpOutputStage _recurrent_to_input_outstage{};
- NEArithmeticAddition _accumulate_input_recurrent_input{};
- NEPixelWiseMultiplication _pixelwise_mul_cell_to_input{};
- NEGEMMLowpOutputStage _cell_to_input_outstage{};
- NEArithmeticAddition _accumulate_cell_input{};
- NEActivationLayer _input_gate_sigmoid{};
- NEPixelWiseMultiplication _pixelwise_mul_forget_cell{};
- NEPixelWiseMultiplication _pixelwise_mul_input_cell{};
- NEArithmeticAddition _add_forget_cell{};
- NEActivationLayer _cell_clip{};
- NEGEMMLowpMatrixMultiplyCore _mm_input_to_output{};
- NEGEMMLowpOutputStage _input_to_output_outstage{};
- NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{};
- NEGEMMLowpOutputStage _recurrent_to_output_outstage{};
- NEArithmeticAddition _accumulate_input_recurrent_output{};
- NEPixelWiseMultiplication _pixelwise_mul_cell_to_output{};
- NEGEMMLowpOutputStage _cell_to_output_outstage{};
- NEArithmeticAddition _accumulate_cell_to_output{};
- NEActivationLayer _output_gate_sigmoid{};
- NEActivationLayer _hidden_tanh{};
- NEPixelWiseMultiplication _pixelwise_mul_hidden{};
- NEGEMMLowpOutputStage _hidden_outstage{};
- NEGEMMLowpMatrixMultiplyCore _mm_projection{};
- NEGEMMLowpOutputStage _projection_outstage{};
- NEArithmeticAddition _accumulate_projection{};
- NEActivationLayer _projection_clip{};
+ NETranspose _transpose_input_to_forget_weights;
+ NETranspose _transpose_input_to_cell_weights;
+ NETranspose _transpose_input_to_output_weights;
+ NETranspose _transpose_input_to_input_weights;
+ NETranspose _transpose_recurrent_to_forget_weights;
+ NETranspose _transpose_recurrent_to_cell_weights;
+ NETranspose _transpose_recurrent_to_output_weights;
+ NETranspose _transpose_recurrent_to_input_weights;
+ NETranspose _transpose_projection_weights;
+ std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_input_reduction;
+ std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
+ std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction;
+ std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
+ std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction;
+ std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
+ std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_output_reduction;
+ std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
+ std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _projection_reduction;
+ NEArithmeticAddition _projection_bias_add;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget;
+ NEGEMMLowpOutputStage _input_to_forget_outstage;
+ NEGEMMLowpOutputStage _recurrent_to_forget_outstage;
+ NEGEMMLowpOutputStage _cell_to_forget_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_forget;
+ NEArithmeticAddition _accumulate_cell_forget;
+ NEActivationLayer _forget_gate_sigmoid;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell;
+ NEGEMMLowpOutputStage _input_to_cell_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell;
+ NEGEMMLowpOutputStage _recurrent_to_cell_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_modulation;
+ NEActivationLayer _cell_gate_tanh;
+ NEArithmeticSubtraction _input_gate_sub;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_input;
+ NEGEMMLowpOutputStage _input_to_input_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input;
+ NEGEMMLowpOutputStage _recurrent_to_input_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_input;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_input;
+ NEGEMMLowpOutputStage _cell_to_input_outstage;
+ NEArithmeticAddition _accumulate_cell_input;
+ NEActivationLayer _input_gate_sigmoid;
+ NEPixelWiseMultiplication _pixelwise_mul_forget_cell;
+ NEPixelWiseMultiplication _pixelwise_mul_input_cell;
+ NEArithmeticAddition _add_forget_cell;
+ NEActivationLayer _cell_clip;
+ NEGEMMLowpMatrixMultiplyCore _mm_input_to_output;
+ NEGEMMLowpOutputStage _input_to_output_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output;
+ NEGEMMLowpOutputStage _recurrent_to_output_outstage;
+ NEArithmeticAddition _accumulate_input_recurrent_output;
+ NEPixelWiseMultiplication _pixelwise_mul_cell_to_output;
+ NEGEMMLowpOutputStage _cell_to_output_outstage;
+ NEArithmeticAddition _accumulate_cell_to_output;
+ NEActivationLayer _output_gate_sigmoid;
+ NEActivationLayer _hidden_tanh;
+ NEPixelWiseMultiplication _pixelwise_mul_hidden;
+ NEGEMMLowpOutputStage _hidden_outstage;
+ NEGEMMLowpMatrixMultiplyCore _mm_projection;
+ NEGEMMLowpOutputStage _projection_outstage;
+ NEArithmeticAddition _accumulate_projection;
+ NEActivationLayer _projection_clip;
- TensorCopyKernel _projection_bias_copy{};
- TensorCopyKernel _projection_output_to_accumulate_copy{};
- TensorCopyKernel _projection_accumulate_to_output_copy{};
- TensorCopyKernel _hidden_to_output_copy{};
+ TensorCopyKernel _projection_bias_copy;
+ TensorCopyKernel _projection_output_to_accumulate_copy;
+ TensorCopyKernel _projection_accumulate_to_output_copy;
+ TensorCopyKernel _hidden_to_output_copy;
- std::array<NEQLSTMLayerNormalizationKernel, _layer_norm_count> _layer_norms{ {} };
+ std::array<std::unique_ptr<NEQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms;
- NECopyKernel _copy_output{};
+ NECopy _copy_output;
// Tensor pointers
const ITensor *_input_to_input_weights
const ITensor *_recurrent_to_cell_weights{ nullptr };
const ITensor *_recurrent_to_output_weights{ nullptr };
const ITensor *_projection_weights{ nullptr };
- std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{ {} };
- std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{ {} };
+ std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{};
+ std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{};
using LayerNormIndexType = typename std::underlying_type<LayerNormGate>::type;
inline LayerNormIndexType getGateIndex(LayerNormGate g)
return _layer_norm_bias[getGateIndex(g)];
}
- inline NEQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g)
+ inline std::unique_ptr<NEQLSTMLayerNormalizationKernel> &get_layer_norm(LayerNormGate g)
{
return _layer_norms[getGateIndex(g)];
}
- inline void configure_layer_norm(LayerNormGate g, const ITensor *in)
- {
- ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
-
- Tensor &out = get_layer_norm_output(g);
- _memory_group.manage(&out);
- out.allocator()->init(*(in->info()));
-
- get_layer_norm(g).configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g));
- }
-
- inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
- {
- // Output quantization scale will be different, but ignored here
- // since it will be configured at configure() stage.
- const TensorInfo out
- {
- in
- };
- return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
- }
+ void configure_layer_norm(LayerNormGate g, const ITensor *in);
+ static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias);
// Temporary tensors
Tensor _input_to_forget_weights_transposed{ nullptr };
Tensor _projection_out_res{ nullptr };
Tensor _projection_accumulate_res{ nullptr };
Tensor _ones{ nullptr };
- std::array<Tensor, _layer_norm_count> _layer_norm_output{ {} };
+ std::array<Tensor, _layer_norm_count> _layer_norm_output{};
inline Tensor &get_layer_norm_output(LayerNormGate g)
{
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include "arm_compute/core/Types.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to simulate a quantization layer. This function calls the following NEON kernels:
*
class NEQuantizationLayer : public INESimpleFunctionNoBorder
{
public:
- /** Default constructor */
- NEQuantizationLayer() = default;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
#ifndef ARM_COMPUTE_NERNNLAYER_H
#define ARM_COMPUTE_NERNNLAYER_H
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
{
// Forward declarations
class ITensor;
+class NECopyKernel;
/** Basic function to run @ref NERNNLayer */
class NERNNLayer : public IFunction
NERNNLayer &operator=(const NERNNLayer &) = delete;
/** Default move assignment operator */
NERNNLayer &operator=(NERNNLayer &&) = default;
+ /** Default destructor */
+ ~NERNNLayer();
/** Initialize the function
*
* @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
void prepare() override;
private:
- MemoryGroup _memory_group;
- NEGEMM _gemm_state_f;
- NEArithmeticAddition _add_f;
- NEActivationLayer _activation;
- NEFullyConnectedLayer _fully_connected;
- NECopyKernel _copy_kernel;
- Tensor _fully_connected_out;
- Tensor _gemm_output;
- Tensor _add_output;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ NEGEMM _gemm_state_f;
+ NEArithmeticAddition _add_f;
+ NEActivationLayer _activation;
+ NEFullyConnectedLayer _fully_connected;
+ std::unique_ptr<NECopyKernel> _copy_kernel;
+ Tensor _fully_connected_out;
+ Tensor _gemm_output;
+ Tensor _add_output;
+ bool _is_prepared;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NERNNLAYER_H */
#ifndef ARM_COMPUTE_NEROIALIGNLAYER_H
#define ARM_COMPUTE_NEROIALIGNLAYER_H
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEROIAlignLayerKernel.
*
* -# @ref NEROIAlignLayerKernel
*
*/
-class NEROIAlignLayer : public INESimpleFunction
+class NEROIAlignLayer : public INESimpleFunctionNoBorder
{
public:
/** Set the input and output tensors.
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h"
+#include <memory>
namespace arm_compute
{
class ITensor;
+class NEROIPoolingLayerKernel;
+class ROIPoolingLayerInfo;
/** Basic function to run @ref NEROIPoolingLayerKernel.
*
public:
/** Constructor */
NEROIPoolingLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEROIPoolingLayer(const NEROIPoolingLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEROIPoolingLayer &operator=(const NEROIPoolingLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEROIPoolingLayer(NEROIPoolingLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEROIPoolingLayer &operator=(NEROIPoolingLayer &&) = delete;
+ /** Default destructor */
+ ~NEROIPoolingLayer();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. Data types supported: F32.
void run() override;
private:
- NEROIPoolingLayerKernel _roi_kernel;
+ std::unique_ptr<NEROIPoolingLayerKernel> _roi_kernel;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEROIPOOLINGLAYER_H */
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NERANGE_H
#define ARM_COMPUTE_NERANGE_H
-#include "arm_compute/core/NEON/kernels/NERangeKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
+class NERangeKernel;
/** Basic function to run @ref NERangeKernel
*
public:
/** Default constructor */
NERange();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NERange(const NERange &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NERange &operator=(const NERange &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NERange(NERange &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NERange &operator=(NERange &&) = delete;
+ /** Default destructor */
+ ~NERange();
/** Initialize the kernel's start, end, step and output tensor.
*
* @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
void run() override;
private:
- NERangeKernel _kernel;
+ std::unique_ptr<NERangeKernel> _kernel;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NERANGE_H */
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
public:
/** Constructor */
NEReduceMean(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReduceMean(const NEReduceMean &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReduceMean &operator=(const NEReduceMean &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEReduceMean(NEReduceMean &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEReduceMean &operator=(NEReduceMean &&) = delete;
+ /** Default destructor */
+ ~NEReduceMean();
/** Configure kernel
*
* @note Supported tensor rank: up to 4
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h"
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
#include "arm_compute/runtime/Tensor.h"
+#include <memory>
namespace arm_compute
{
class ITensor;
+class NEReductionOperationKernel;
/** Basic function to simulate a reduction operation. This function calls the following NEON kernels:
*
public:
/** Default constructor */
NEReductionOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReductionOperation(const NEReductionOperation &) = delete;
+ /** Default move constructor */
+ NEReductionOperation(NEReductionOperation &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReductionOperation &operator=(const NEReductionOperation &) = delete;
+ /** Default move assignment operator */
+ NEReductionOperation &operator=(NEReductionOperation &&) = default;
+ /** Default destructor */
+ ~NEReductionOperation();
/** Set the input and output tensors.
*
* @param[in, out] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. (Written to only for border_size != 0)
void run() override;
private:
- MemoryGroup _memory_group;
- NEReductionOperationKernel _reduction_kernel;
- NEReshapeLayer _reshape;
- Tensor _output_internal;
- size_t _window_split;
- int _reduction_axis;
- bool _is_reshape_required;
+ MemoryGroup _memory_group;
+ std::unique_ptr<NEReductionOperationKernel> _reduction_kernel;
+ NEReshapeLayer _reshape;
+ Tensor _output_internal;
+ size_t _window_split;
+ int _reduction_axis;
+ bool _is_reshape_required;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEREDUCTIONOPERATION_H */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEReorgLayerKernel */
class NEReorgLayer : public INESimpleFunctionNoBorder
#ifndef ARM_COMPUTE_NERESHAPELAYER_H
#define ARM_COMPUTE_NERESHAPELAYER_H
-#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/INEOperator.h"
class NEReshape : public INEOperator
{
public:
+ /** Default Constructor */
+ NEReshape() = default;
+ /** Default Destructor */
+ ~NEReshape();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReshape(const NEReshape &) = delete;
+ /** Default move constructor */
+ NEReshape(NEReshapeLayer &&);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReshape &operator=(const NEReshape &) = delete;
+ /** Default move assignment operator */
+ NEReshape &operator=(NEReshape &&);
/** Initialise the kernel's inputs and outputs
*
* @param[in] input Input tensor info. Data type supported: All
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEReverseKernel */
class NEReverse : public INESimpleFunctionNoBorder
#ifndef ARM_COMPUTE_NESCALEIMAGE_H
#define ARM_COMPUTE_NESCALEIMAGE_H
-#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
+#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include "arm_compute/runtime/Tensor.h"
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#define ARM_COMPUTE_NESELECT_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NESelect */
-class NESelect : public INESimpleFunction
+class NESelect : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs and output.
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NESOBEL5x5_H
#define ARM_COMPUTE_NESOBEL5x5_H
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
namespace arm_compute
{
class ITensor;
+class NESobel5x5HorKernel;
+class NESobel5x5VertKernel;
+class NEFillBorderKernel;
/** Basic function to execute sobel 5x5 filter. This function calls the following NEON kernels:
*
public:
/** Default constructor */
NESobel5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESobel5x5(const NESobel5x5 &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESobel5x5 &operator=(const NESobel5x5 &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NESobel5x5(NESobel5x5 &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NESobel5x5 &operator=(NESobel5x5 &&) = delete;
+ /** Default destructor */
+ ~NESobel5x5();
/** Initialise the function's source, destinations and border mode.
*
* @note At least one of output_x or output_y must be not NULL.
void run() override;
protected:
- MemoryGroup _memory_group; /**< Function memory group */
- NESobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */
- NESobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */
- Tensor _tmp_x; /**< Temporary buffer for Sobel X */
- Tensor _tmp_y; /**< Temporary buffer for Sobel Y */
- NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */
+ MemoryGroup _memory_group; /**< Function memory group */
+ std::unique_ptr<NESobel5x5HorKernel> _sobel_hor; /**< Sobel Horizontal 5x5 kernel */
+ std::unique_ptr<NESobel5x5VertKernel> _sobel_vert; /**< Sobel Vertical 5x5 kernel */
+ Tensor _tmp_x; /**< Temporary buffer for Sobel X */
+ Tensor _tmp_y; /**< Temporary buffer for Sobel Y */
+ std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle tensor borders */
};
}
#endif /*ARM_COMPUTE_NESOBEL5x5_H */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NESOBEL7x7_H
#define ARM_COMPUTE_NESOBEL7x7_H
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
namespace arm_compute
{
class ITensor;
+class NESobel7x7HorKernel;
+class NESobel7x7VertKernel;
+class NEFillBorderKernel;
/** Basic function to execute sobel 7x7 filter. This function calls the following NEON kernels:
*
public:
/** Default constructor */
NESobel7x7(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESobel7x7(const NESobel7x7 &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESobel7x7 &operator=(const NESobel7x7 &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NESobel7x7(NESobel7x7 &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NESobel7x7 &operator=(NESobel7x7 &&) = delete;
+ /** Default destructor */
+ ~NESobel7x7();
/** Initialise the function's source, destinations and border mode.
*
* @note At least one of output_x or output_y must be not NULL.
void run() override;
protected:
- MemoryGroup _memory_group; /**< Function memory group */
- NESobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */
- NESobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */
- Tensor _tmp_x; /**< Temporary buffer for Sobel X */
- Tensor _tmp_y; /**< Temporary buffer for Sobel Y */
- NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */
+ MemoryGroup _memory_group; /**< Function memory group */
+ std::unique_ptr<NESobel7x7HorKernel> _sobel_hor; /**< Sobel Horizontal 7x7 kernel */
+ std::unique_ptr<NESobel7x7VertKernel> _sobel_vert; /**< Sobel Vertical 7x7 kernel */
+ Tensor _tmp_x; /**< Temporary buffer for Sobel X */
+ Tensor _tmp_y; /**< Temporary buffer for Sobel Y */
+ std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle tensor borders */
};
}
#endif /*ARM_COMPUTE_NESOBEL7x7_H */
#ifndef ARM_COMPUTE_NESOFTMAXLAYER_H
#define ARM_COMPUTE_NESOFTMAXLAYER_H
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEPermute.h"
#include "arm_compute/runtime/Tensor.h"
+#include <memory>
namespace arm_compute
{
class ITensor;
+class NELogits1DMaxKernel;
+template <bool IS_LOG>
+class NELogits1DSoftmaxKernel;
+class NEFillBorderKernel;
/** Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer.
*
NESoftmaxLayerGeneric &operator=(const NESoftmaxLayerGeneric &) = delete;
/** Default move assignment operator */
NESoftmaxLayerGeneric &operator=(NESoftmaxLayerGeneric &&) = default;
+ /** Default destructor */
+ ~NESoftmaxLayerGeneric();
/** Set the input and output tensors.
*
* @param[in,out] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. If the width is not a
void run() override;
private:
- MemoryGroup _memory_group;
- NEPermute _permute_input;
- NEPermute _permute_output;
- NELogits1DMaxKernel _max_kernel;
- NELogits1DSoftmaxKernel<IS_LOG> _softmax_kernel;
- NEFillBorderKernel _fill_border_kernel;
- Tensor _max;
- Tensor _tmp;
- Tensor _input_permuted;
- Tensor _output_permuted;
- bool _needs_permute;
+ MemoryGroup _memory_group;
+ NEPermute _permute_input;
+ NEPermute _permute_output;
+ std::unique_ptr<NELogits1DMaxKernel> _max_kernel;
+ std::unique_ptr<NELogits1DSoftmaxKernel<IS_LOG>> _softmax_kernel;
+ std::unique_ptr<NEFillBorderKernel> _fill_border_kernel;
+ Tensor _max;
+ Tensor _tmp;
+ Tensor _input_permuted;
+ Tensor _output_permuted;
+ bool _needs_permute;
};
using NESoftmaxLayer = NESoftmaxLayerGeneric<false>;
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
+class NESpaceToBatchLayerKernel;
+class NEMemsetKernel;
/** Basic function to spatial divide a tensor. This function calls the following NEON kernels/functions:
*
/** Allow instances of this class to be moved */
NESpaceToBatchLayer &operator=(NESpaceToBatchLayer &&) = default;
/** Default destructor */
- virtual ~NESpaceToBatchLayer() = default;
+ ~NESpaceToBatchLayer();
/** Set the input and output tensors.
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
void run() override;
private:
- NESpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
- NEMemsetKernel _memset_kernel; /**< Memset kernel to run */
- bool _has_padding; /**< Flag to check if the output has padding */
+ std::unique_ptr<NESpaceToBatchLayerKernel> _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
+ std::unique_ptr<NEMemsetKernel> _memset_kernel; /**< Memset kernel to run */
+ bool _has_padding; /**< Flag to check if the output has padding */
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NESPACETOBATCHLAYER_H */
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_NESPACETODEPTHLAYER_H
#define ARM_COMPUTE_NESPACETODEPTHLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h"
-#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
+class NESpaceToDepthLayerKernel;
/** This function calls the following NEON kernels/functions:
*
/** Allow instances of this class to be moved */
NESpaceToDepthLayer &operator=(NESpaceToDepthLayer &&) = default;
/** Default destructor */
- virtual ~NESpaceToDepthLayer() = default;
+ ~NESpaceToDepthLayer();
/** Set the input and output tensors.
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
void run() override;
private:
- NESpaceToDepthLayerKernel _space_to_depth_kernel; /**< SpaceToDepth kernel to run */
+ std::unique_ptr<NESpaceToDepthLayerKernel> _space_to_depth_kernel; /**< SpaceToDepth kernel to run */
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NESPACETODEPTHLAYER_H */
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEStackLayerKernel.h"
-
#include <memory>
#include <vector>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
+class NEStackLayerKernel;
/** Basic function to stack tensors along an axis. This function calls the following kernel:
*
public:
/** Default constructor */
NEStackLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEStackLayer(const NEStackLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEStackLayer &operator=(const NEStackLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEStackLayer(NEStackLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEStackLayer &operator=(NEStackLayer &&) = delete;
+ /** Default destructor */
+ ~NEStackLayer();
/** Initialise the kernel's inputs vector and output.
*
* @note Supported input tensor rank: up to 4
void run() override;
private:
- std::vector<ITensor *> _input;
- std::vector<NEStackLayerKernel> _stack_kernels;
- unsigned int _num_inputs;
+ std::vector<ITensor *> _input;
+ std::vector<std::unique_ptr<NEStackLayerKernel>> _stack_kernels;
+ unsigned int _num_inputs;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NESTACKLAYER_H */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
{
// Forward declarations
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEThresholdKernel */
class NEThreshold : public INESimpleFunctionNoBorder
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NETileKernel */
class NETile : public INESimpleFunctionNoBorder
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to transpose a matrix on NEON. This function calls the following NEON kernel:
*
public:
/** Default constructor */
NEUnstack();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEUnstack(const NEUnstack &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEUnstack &operator=(const NEUnstack &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEUnstack(NEUnstack &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEUnstack &operator=(NEUnstack &&) = delete;
+ /** Default destructor */
+ ~NEUnstack() = default;
/** Set the input, output and unstacking axis.
*
* @param[in] input A tensor to be unstacked. Data type supported: All.
#ifndef ARM_COMPUTE_NEUPSAMPLELAYER_H
#define ARM_COMPUTE_NEUPSAMPLELAYER_H
-#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/Tensor.h"
+#include <memory>
+
namespace arm_compute
{
class ITensor;
+class NEUpsampleLayerKernel;
/** Function to run upsample layer */
class NEUpsampleLayer : public IFunction
public:
/** Constructor */
NEUpsampleLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEUpsampleLayer(const NEUpsampleLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEUpsampleLayer &operator=(const NEUpsampleLayer &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEUpsampleLayer(NEUpsampleLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEUpsampleLayer &operator=(NEUpsampleLayer &&) = delete;
+ /** Default destructor */
+ ~NEUpsampleLayer();
/** Set the input output tensors.
*
* @param[in] input Source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
void run() override;
private:
- NEUpsampleLayerKernel _kernel;
- DataLayout _data_layout;
+ std::unique_ptr<NEUpsampleLayerKernel> _kernel;
+ DataLayout _data_layout;
};
} // arm_compute
#endif /* ARM_COMPUTE_NEUPSAMPLELAYER_H */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CPP/functions/CPPPermute.h"
#include "arm_compute/runtime/MemoryGroup.h"
{
// Forward declarations
class ITensor;
+class ICPPKernel;
/** Basic function to simulate a convolution layer. This function calls the following NEON kernels:
* -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method )
public:
/** Constructor */
NEWinogradConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr);
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEWinogradConvolutionLayer(NEWinogradConvolutionLayer &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEWinogradConvolutionLayer &operator=(NEWinogradConvolutionLayer &&) = delete;
+ /** Default destructor */
+ ~NEWinogradConvolutionLayer() = default;
/** Set the input and output tensors.
*
NEWinogradConvolutionLayer &operator=(const NEWinogradConvolutionLayer &) = delete;
private:
- MemoryGroup _memory_group;
- NEGEMM _gemm_function;
- std::unique_ptr<INEKernel> _transform_input_kernel;
- std::unique_ptr<INEKernel> _transform_output_kernel;
- std::unique_ptr<INEKernel> _transform_weights_kernel;
- NEActivationLayer _activationlayer_function;
+ MemoryGroup _memory_group;
+ NEGEMM _gemm_function;
+ std::unique_ptr<ICPPKernel> _transform_input_kernel;
+ std::unique_ptr<ICPPKernel> _transform_output_kernel;
+ std::unique_ptr<ICPPKernel> _transform_weights_kernel;
+ NEActivationLayer _activationlayer_function;
CPPPermute _permute_input;
CPPPermute _permute_weights;
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h"
#include "arm_compute/core/Types.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEYOLOLayerKernel */
class NEYOLOLayer : public INESimpleFunctionNoBorder
* @brief Folder containing all the GLES kernels
*/
-/** @dir arm_compute/core/NEON
+/** @dir src/core/NEON
* @brief NEON backend core: kernels and utilities.
*/
-/** @file arm_compute/core/NEON/NEKernels.h
+/** @file src/core/NEON/NEKernels.h
* @brief Includes all the NEON kernels at once
*/
-/** @dir arm_compute/core/NEON/kernels
+/** @dir src/core/NEON/kernels
* @brief Folder containing all the NEON kernels
*/
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
- INEKernel::configure(win);
+ ICPPKernel::configure(win);
}
void CPPCornerCandidatesKernel::run(const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
Iterator input(_input, window);
execute_window_loop(window, [&](const Coordinates & id)
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_INEKERNEL_H
+#define ARM_COMPUTE_INEKERNEL_H
+
+#include "arm_compute/core/CPP/ICPPKernel.h"
+
+namespace arm_compute
+{
+/** Common interface for all kernels implemented in NEON. */
+using INEKernel = ICPPKernel;
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_INEKERNEL_H */
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_INESIMPLEKERNEL_H
+#define ARM_COMPUTE_INESIMPLEKERNEL_H
+
+#include "arm_compute/core/CPP/ICPPSimpleKernel.h"
+
+namespace arm_compute
+{
+/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */
+using INESimpleKernel = ICPPSimpleKernel;
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_INESIMPLEKERNEL_H */
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEKERNELS_H
+#define ARM_COMPUTE_NEKERNELS_H
+
+/* Header regrouping all the NEON kernels */
+#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
+#include "src/core/NEON/kernels/NEAccumulateKernel.h"
+#include "src/core/NEON/kernels/NEActivationLayerKernel.h"
+#include "src/core/NEON/kernels/NEArithmeticAdditionKernel.h"
+#include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
+#include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
+#include "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseAndKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseNotKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseOrKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseXorKernel.h"
+#include "src/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
+#include "src/core/NEON/kernels/NEBox3x3Kernel.h"
+#include "src/core/NEON/kernels/NECannyEdgeKernel.h"
+#include "src/core/NEON/kernels/NEChannelCombineKernel.h"
+#include "src/core/NEON/kernels/NEChannelExtractKernel.h"
+#include "src/core/NEON/kernels/NEChannelShuffleLayerKernel.h"
+#include "src/core/NEON/kernels/NECol2ImKernel.h"
+#include "src/core/NEON/kernels/NEColorConvertKernel.h"
+#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEConvolutionKernel.h"
+#include "src/core/NEON/kernels/NECopyKernel.h"
+#include "src/core/NEON/kernels/NECropKernel.h"
+#include "src/core/NEON/kernels/NECumulativeDistributionKernel.h"
+#include "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h"
+#include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
+#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
+#include "src/core/NEON/kernels/NEDequantizationLayerKernel.h"
+#include "src/core/NEON/kernels/NEDerivativeKernel.h"
+#include "src/core/NEON/kernels/NEDilateKernel.h"
+#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
+#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEElementwiseOperationKernel.h"
+#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h"
+#include "src/core/NEON/kernels/NEErodeKernel.h"
+#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
+#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
+#include "src/core/NEON/kernels/NEFFTScaleKernel.h"
+#include "src/core/NEON/kernels/NEFastCornersKernel.h"
+#include "src/core/NEON/kernels/NEFillArrayKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
+#include "src/core/NEON/kernels/NEFloorKernel.h"
+#include "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h"
+#include "src/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/NEON/kernels/NEGatherKernel.h"
+#include "src/core/NEON/kernels/NEGaussian3x3Kernel.h"
+#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h"
+#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h"
+#include "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
+#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h"
+#include "src/core/NEON/kernels/NEHOGDetectorKernel.h"
+#include "src/core/NEON/kernels/NEHarrisCornersKernel.h"
+#include "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEHistogramKernel.h"
+#include "src/core/NEON/kernels/NEIm2ColKernel.h"
+#include "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h"
+#include "src/core/NEON/kernels/NEIntegralImageKernel.h"
+#include "src/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
+#include "src/core/NEON/kernels/NELKTrackerKernel.h"
+#include "src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h"
+#include "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h"
+#include "src/core/NEON/kernels/NEMeanStdDevKernel.h"
+#include "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h"
+#include "src/core/NEON/kernels/NEMedian3x3Kernel.h"
+#include "src/core/NEON/kernels/NEMemsetKernel.h"
+#include "src/core/NEON/kernels/NEMinMaxLayerKernel.h"
+#include "src/core/NEON/kernels/NEMinMaxLocationKernel.h"
+#include "src/core/NEON/kernels/NENonLinearFilterKernel.h"
+#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
+#include "src/core/NEON/kernels/NENormalizationLayerKernel.h"
+#include "src/core/NEON/kernels/NEPadLayerKernel.h"
+#include "src/core/NEON/kernels/NEPermuteKernel.h"
+#include "src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
+#include "src/core/NEON/kernels/NEPoolingLayerKernel.h"
+#include "src/core/NEON/kernels/NEPriorBoxLayerKernel.h"
+#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
+#include "src/core/NEON/kernels/NEQuantizationLayerKernel.h"
+#include "src/core/NEON/kernels/NEROIAlignLayerKernel.h"
+#include "src/core/NEON/kernels/NEROIPoolingLayerKernel.h"
+#include "src/core/NEON/kernels/NERangeKernel.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "src/core/NEON/kernels/NERemapKernel.h"
+#include "src/core/NEON/kernels/NEReorgLayerKernel.h"
+#include "src/core/NEON/kernels/NEReshapeLayerKernel.h"
+#include "src/core/NEON/kernels/NEReverseKernel.h"
+#include "src/core/NEON/kernels/NEScaleKernel.h"
+#include "src/core/NEON/kernels/NEScharr3x3Kernel.h"
+#include "src/core/NEON/kernels/NESelectKernel.h"
+#include "src/core/NEON/kernels/NESobel3x3Kernel.h"
+#include "src/core/NEON/kernels/NESobel5x5Kernel.h"
+#include "src/core/NEON/kernels/NESobel7x7Kernel.h"
+#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h"
+#include "src/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
+#include "src/core/NEON/kernels/NESpaceToDepthLayerKernel.h"
+#include "src/core/NEON/kernels/NEStackLayerKernel.h"
+#include "src/core/NEON/kernels/NEStridedSliceKernel.h"
+#include "src/core/NEON/kernels/NETableLookupKernel.h"
+#include "src/core/NEON/kernels/NEThresholdKernel.h"
+#include "src/core/NEON/kernels/NETileKernel.h"
+#include "src/core/NEON/kernels/NETransposeKernel.h"
+#include "src/core/NEON/kernels/NEUpsampleLayerKernel.h"
+#include "src/core/NEON/kernels/NEWarpKernel.h"
+#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
+#include "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEYOLOLayerKernel.h"
+
+#endif /* ARM_COMPUTE_NEKERNELS_H */
*/
#include "arm_compute/core/TracePoint.h"
-#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
+#include "src/core/NEON/kernels/NELKTrackerKernel.h"
#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h"
#include "src/core/NEON/kernels/convolution/common/convolution.hpp"
#include "utils/TypePrinter.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
+#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H
+#define ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the absolute difference kernel
+ *
+ * Absolute difference is computed by:
+ * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f]
+ */
+class NEAbsoluteDifferenceKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEAbsoluteDifferenceKernel";
+ }
+ /** Default constructor */
+ NEAbsoluteDifferenceKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAbsoluteDifferenceKernel(const NEAbsoluteDifferenceKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAbsoluteDifferenceKernel &operator=(const NEAbsoluteDifferenceKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEAbsoluteDifferenceKernel(NEAbsoluteDifferenceKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEAbsoluteDifferenceKernel &operator=(NEAbsoluteDifferenceKernel &&) = default;
+ /** Default destructor */
+ ~NEAbsoluteDifferenceKernel() = default;
+
+ /** Set the inputs and output tensors
+ *
+ * @param[in] input1 Source tensor. Data types supported: U8/S16
+ * @param[in] input2 Source tensor. Data types supported: U8/S16
+ * @param[out] output Destination tensor, Data types supported: U8/S16
+ */
+ void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Common signature for all the specialised absolute difference functions
+ *
+ * @param[in] input1 An input tensor. Data types supported: U8/S16.
+ * @param[in] input2 An input tensor. Data types supported: U8/S16.
+ * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16.
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using AbsDiffFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
+
+ /** Absolute difference function to use for the particular tensor formats passed to configure() */
+ AbsDiffFunction *_func;
+ const ITensor *_input1;
+ const ITensor *_input2;
+ ITensor *_output;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h"
+#include "src/core/NEON/kernels/NEAccumulateKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include <arm_neon.h>
-using namespace arm_compute;
-
namespace arm_compute
{
-class Coordinates;
-} // namespace arm_compute
-
/* Max S16 value used for saturation purposes. */
const static uint16x8_t max_int_u16 = vdupq_n_u16(static_cast<uint16_t>(INT16_MAX));
},
input, accum);
}
+} // namespace arm_compute
\ No newline at end of file
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEACCUMULATEKERNEL_H
+#define ARM_COMPUTE_NEACCUMULATEKERNEL_H
+
+#include "src/core/NEON/INESimpleKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the accumulate kernel
+ *
+ * Accumulation is computed by:
+ * @f[ accum(x,y) = accum(x,y) + input(x,y) @f]
+ */
+class NEAccumulateKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEAccumulateKernel";
+ }
+ /** Default constructor */
+ NEAccumulateKernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAccumulateKernel(const NEAccumulateKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAccumulateKernel &operator=(const NEAccumulateKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEAccumulateKernel(NEAccumulateKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEAccumulateKernel &operator=(NEAccumulateKernel &&) = default;
+ /** Default destructor */
+ ~NEAccumulateKernel() = default;
+ /** Set the input and accumulation tensors
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[out] accum Destination tensor. Data type supported: S16.
+ */
+ void configure(const ITensor *input, ITensor *accum);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+};
+
+/** Interface for the accumulate weighted kernel
+ *
+ * Weighted accumulation is computed:
+ * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f]
+ *
+ * Where @f$ 0 \le \alpha \le 1 @f$
+ * Conceptually, the rounding for this is defined as:
+ * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f]
+*/
+class NEAccumulateWeightedKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEAccumulateWeightedKernel";
+ }
+ /** Default constructor */
+ NEAccumulateWeightedKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAccumulateWeightedKernel(const NEAccumulateWeightedKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAccumulateWeightedKernel &operator=(const NEAccumulateWeightedKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEAccumulateWeightedKernel(NEAccumulateWeightedKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEAccumulateWeightedKernel &operator=(NEAccumulateWeightedKernel &&) = default;
+ /** Default destructor */
+ ~NEAccumulateWeightedKernel() = default;
+ /** Set the input and accumulation tensors, and the scale value
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[in] alpha Scalar value in the range [0.0f, 1.0f]
+ * @param[in,out] accum Accumulated tensor. Data type supported: U8.
+ */
+ void configure(const ITensor *input, float alpha, ITensor *accum);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+protected:
+ float _alpha;
+};
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+/** Interface for the accumulate weighted kernel using F16 */
+class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEAccumulateWeightedFP16Kernel";
+ }
+ /** Default constructor */
+ NEAccumulateWeightedFP16Kernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAccumulateWeightedFP16Kernel(const NEAccumulateWeightedFP16Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAccumulateWeightedFP16Kernel &operator=(const NEAccumulateWeightedFP16Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEAccumulateWeightedFP16Kernel(NEAccumulateWeightedFP16Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEAccumulateWeightedFP16Kernel &operator=(NEAccumulateWeightedFP16Kernel &&) = default;
+ /** Default destructor */
+ ~NEAccumulateWeightedFP16Kernel() = default;
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+};
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+/** Interface for the accumulate weighted kernel using F16 */
+using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel;
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+/** Interface for the accumulate squared kernel
+ *
+ * The accumulation of squares is computed:
+ * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f]
+ *
+ * Where @f$ 0 \le shift \le 15 @f$
+*/
+class NEAccumulateSquaredKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEAccumulateSquaredKernel";
+ }
+ /** Default constructor */
+ NEAccumulateSquaredKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAccumulateSquaredKernel(const NEAccumulateSquaredKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEAccumulateSquaredKernel &operator=(const NEAccumulateSquaredKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEAccumulateSquaredKernel(NEAccumulateSquaredKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEAccumulateSquaredKernel &operator=(NEAccumulateSquaredKernel &&) = default;
+ /** Default destructor */
+ ~NEAccumulateSquaredKernel() = default;
+ /** Set the input and accumulation tensors and the shift value.
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[in] shift Shift value in the range of [0, 15]
+ * @param[in,out] accum Accumulated tensor. Data type supported: S16.
+ */
+ void configure(const ITensor *input, uint32_t shift, ITensor *accum);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ uint32_t _shift;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEACCUMULATEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
+#include "src/core/NEON/kernels/NEActivationLayerKernel.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H
+#define ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H
+
+#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/core/NEON/INEKernel.h"
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#include <arm_fp16.h>
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the activation layer kernel. */
+class NEActivationLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEActivationLayerKernel";
+ }
+ /** Constructor */
+ NEActivationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEActivationLayerKernel(const NEActivationLayerKernel &) = delete;
+ /** Default move constructor */
+ NEActivationLayerKernel(NEActivationLayerKernel &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEActivationLayerKernel &operator=(const NEActivationLayerKernel &) = delete;
+ /** Default move assignment operator */
+ NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEActivationLayerKernel() = default;
+ /** Set the input and output tensor.
+ *
+ * @note If the output tensor is a nullptr, the activation function will be performed in-place
+ *
+ * @param[in, out] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
+ * @param[out] output Destination tensor info. Data type supported: same as @p input
+ * @param[in] activation_info Activation layer information.
+ */
+ void configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel
+ *
+ * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: same as @p input
+ * @param[in] act_info Activation layer information.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+
+private:
+ ActivationLayerInfo _act_info;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
+#include "src/core/NEON/kernels/NEArithmeticAdditionKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H
+#define ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to perform addition between two tensors */
+class NEArithmeticAdditionKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEArithmeticAdditionKernel";
+ }
+ /** Default constructor */
+ NEArithmeticAdditionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArithmeticAdditionKernel(const NEArithmeticAdditionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArithmeticAdditionKernel &operator=(const NEArithmeticAdditionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEArithmeticAdditionKernel(NEArithmeticAdditionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEArithmeticAdditionKernel &operator=(NEArithmeticAdditionKernel &&) = default;
+ /** Default destructor */
+ ~NEArithmeticAdditionKernel() = default;
+
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (S16,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,S16) -> S16
+ * - (S32,S32) -> S32
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ *
+ * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
+ * @param[in] policy Overflow policy.
+ */
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAdditionKernel
+ *
+ * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
+ * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
+ * @param[in] policy Overflow policy.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Common signature for all the specialised add functions
+ *
+ * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32
+ * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32
+ * @param[out] output The output tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32.
+ * @param[in] policy Overflow policy.
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const Window &window);
+ /** Add function to use for the particular tensor types passed to configure() */
+ AddFunction *_func;
+ ConvertPolicy _policy;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
+#include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H
+#define ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to perform subtraction between two tensors */
+class NEArithmeticSubtractionKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEArithmeticSubtractionKernel";
+ }
+ /** Default constructor */
+ NEArithmeticSubtractionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArithmeticSubtractionKernel(const NEArithmeticSubtractionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEArithmeticSubtractionKernel &operator=(const NEArithmeticSubtractionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEArithmeticSubtractionKernel(NEArithmeticSubtractionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEArithmeticSubtractionKernel &operator=(NEArithmeticSubtractionKernel &&) = default;
+ /** Default destructor */
+ ~NEArithmeticSubtractionKernel() = default;
+
+ /** Initialise the kernel's input and output.
+ *
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (QASYMM8, QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (S16,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,S16) -> S16
+ * - (S32,S32) -> S32
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ *
+ * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
+ * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32.
+ * @param[in] policy Overflow policy. Convert policy cannot be WRAP if datatype is quantized.
+ */
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtractionKernel
+ *
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (QASYMM8, QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (S16,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,S16) -> S16
+ * - (S32,S32) -> S32
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ *
+ * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
+ * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
+ * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32.
+ * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Common signature for all the specialised sub functions
+ *
+ * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
+ * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32.
+ * @param[in] window Region on which to execute the kernel.
+ * @param[in] is_sat Flag to indicate if the policy is SATURATE.
+ */
+ using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window, bool is_sat);
+ /** Sub function to use for the particular tensor types passed to configure() */
+ SubFunction *_func;
+ ConvertPolicy _policy;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H
+#define ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the batch concatenate kernel.
+ * The input tensor will be concatenated into the output tensor.
+ */
+class NEBatchConcatenateLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEBatchConcatenateLayerKernel";
+ }
+ /** Default constructor */
+ NEBatchConcatenateLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBatchConcatenateLayerKernel(const NEBatchConcatenateLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBatchConcatenateLayerKernel &operator=(const NEBatchConcatenateLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEBatchConcatenateLayerKernel(NEBatchConcatenateLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEBatchConcatenateLayerKernel &operator=(NEBatchConcatenateLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEBatchConcatenateLayerKernel() = default;
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] input Input tensor info. Data types supported: All.
+ * @param[in] batch_offset The offset on axis # 3.
+ * @param[in,out] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+ * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+ *
+ */
+ void configure(const ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEBatchConcatenateLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: All.
+ * @param[in] batch_offset The offset on axis # 3.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+
+private:
+ using BatchConcatFunction = void(const ITensor *in, ITensor *out, unsigned int batch_offset, const Window &window);
+
+private:
+ BatchConcatFunction *_func;
+ unsigned int _batch_offset;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
+#include "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H
+#define ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the batch normalization layer kernel.
+ */
+class NEBatchNormalizationLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEBatchNormalizationLayerKernel";
+ }
+ /** Default constructor */
+ NEBatchNormalizationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBatchNormalizationLayerKernel(const NEBatchNormalizationLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBatchNormalizationLayerKernel &operator=(const NEBatchNormalizationLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ NEBatchNormalizationLayerKernel(NEBatchNormalizationLayerKernel &&) = default;
+ /** Default move assignment operator */
+ NEBatchNormalizationLayerKernel &operator=(NEBatchNormalizationLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEBatchNormalizationLayerKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
+ *
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
+ * 3 lower dimensions represent a single input with dimensions [width, height, FM].
+ * The rest are optional and used for representing batches. Data types supported: F16/F32.
+ * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+ * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
+ * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
+ * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ */
+ void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta = nullptr, const ITensor *gamma = nullptr, float epsilon = 0.001f,
+ ActivationLayerInfo act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref NEBatchNormalizationLayerKernel
+ *
+ * @param[in] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result.
+ * 3 lower dimensions represent a single input with dimensions [width, height, FM].
+ * The rest are optional and used for representing batches. Data types supported: F16/F32.
+ * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
+ * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
+ * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
+ * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *mean, const ITensorInfo *var,
+ const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr,
+ float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Configure execution function in case of non-fused activation **/
+ void configure_non_fused();
+ /** Configure execution function in case of fused activation **/
+ void configure_fused();
+
+ /** Template function to run batch normalization on fp32
+ *
+ * @tparam T Specialization data type
+ * @tparam fused_activation Boolean that flags if its a fused activation or not
+ * @tparam F Activation function functor to run
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <typename T, bool fused_activation, typename F>
+ void batch_normalization_nchw(const Window &window);
+ /** Template function to run batch normalization on fp32 on tensors with NHWC format
+ *
+ * @tparam T Specialization data type
+ * @tparam fused_activation Boolean that flags if its a fused activation or not
+ * @tparam F Activation function functor to run
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <typename T, bool fused_activation, typename F>
+ void batch_normalization_nhwc(const Window &window);
+ /** Common signature for all the batch normalization functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using BatchNormFunctionPtr = void (NEBatchNormalizationLayerKernel::*)(const Window &window);
+
+private:
+ BatchNormFunctionPtr _func;
+ ITensor *_input;
+ ITensor *_output;
+ const ITensor *_mean;
+ const ITensor *_var;
+ const ITensor *_gamma;
+ const ITensor *_beta;
+ float _epsilon;
+ ActivationLayerInfo _act_info;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
+#include "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H
+#define ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the batch to space kernel */
+class NEBatchToSpaceLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEBatchToSpaceLayerKernel";
+ }
+ /** Default constructor */
+ NEBatchToSpaceLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBatchToSpaceLayerKernel(const NEBatchToSpaceLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBatchToSpaceLayerKernel &operator=(const NEBatchToSpaceLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEBatchToSpaceLayerKernel(NEBatchToSpaceLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEBatchToSpaceLayerKernel &operator=(NEBatchToSpaceLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEBatchToSpaceLayerKernel() = default;
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(const ITensor *input, const ITensor *block_shape, ITensor *output);
+ /** Initialise the kernel's inputs and output (Static block shape).
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape_x Block shape x value.
+ * @param[in] block_shape_y Block shape y value.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(const ITensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
+ * @param[in] output Tensor output. Data types supported: same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel (Static block shape).
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape_x Block shape x value.
+ * @param[in] block_shape_y Block shape y value.
+ * @param[in] output Tensor output. Data types supported: same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input; /**< Source tensor */
+ const ITensor *_block_shape; /**< Block shape tensor */
+ ITensor *_output; /**< Destination tensor */
+ DataLayout _data_layout; /**< Data layout to be used at run-time */
+
+ int32_t _block_shape_x;
+ int32_t _block_shape_y;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseAndKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEBITWISEANDKERNEL_H
+#define ARM_COMPUTE_NEBITWISEANDKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to perform bitwise AND between XY-planes of two tensors
+ *
+ * Result is computed by:
+ * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f]
+ */
+class NEBitwiseAndKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEBitwiseAndKernel";
+ }
+ /** Default constructor */
+ NEBitwiseAndKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBitwiseAndKernel(const NEBitwiseAndKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBitwiseAndKernel &operator=(const NEBitwiseAndKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEBitwiseAndKernel(NEBitwiseAndKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEBitwiseAndKernel &operator=(NEBitwiseAndKernel &&) = default;
+ /** Default destructor */
+ ~NEBitwiseAndKernel() = default;
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] input1 An input tensor. Data type supported: U8.
+ * @param[in] input2 An input tensor. Data type supported: U8
+ * @param[out] output Output tensor. Data type supported: U8.
+ */
+ void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input1; /**< Source tensor 1 */
+ const ITensor *_input2; /**< Source tensor 2 */
+ ITensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEBITWISEANDKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseNotKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEBITWISENOTKERNEL_H
+#define ARM_COMPUTE_NEBITWISENOTKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to perform bitwise NOT operation
+ *
+ * Result is computed by:
+ * @f[ output(x,y) = \lnot input(x,y) @f]
+ */
+class NEBitwiseNotKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEBitwiseNotKernel";
+ }
+ /** Default constructor */
+ NEBitwiseNotKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBitwiseNotKernel(const NEBitwiseNotKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBitwiseNotKernel &operator=(const NEBitwiseNotKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEBitwiseNotKernel(NEBitwiseNotKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEBitwiseNotKernel &operator=(NEBitwiseNotKernel &&) = default;
+ /** Default destructor */
+ ~NEBitwiseNotKernel() = default;
+ /** Initialise the kernel's input and output
+ *
+ * @param[in] input An input tensor. Data type supported: U8.
+ * @param[out] output The output tensor. Data type supported: U8.
+ */
+ void configure(const ITensor *input, ITensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input; /**< Source tensor */
+ ITensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEBITWISENOTKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseOrKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEBITWISEORKERNEL_H
+#define ARM_COMPUTE_NEBITWISEORKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to perform bitwise inclusive OR between two tensors
+ *
+ * Result is computed by:
+ * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f]
+ */
+class NEBitwiseOrKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEBitwiseOrKernel";
+ }
+ /** Default constructor */
+ NEBitwiseOrKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBitwiseOrKernel(const NEBitwiseOrKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBitwiseOrKernel &operator=(const NEBitwiseOrKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEBitwiseOrKernel(NEBitwiseOrKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEBitwiseOrKernel &operator=(NEBitwiseOrKernel &&) = default;
+ /** Default destructor */
+ ~NEBitwiseOrKernel() = default;
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] input1 An input tensor. Data type supported: U8.
+ * @param[in] input2 An input tensor. Data type supported: U8
+ * @param[out] output Output tensor. Data type supported: U8.
+ */
+ void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input1; /**< Source tensor 1 */
+ const ITensor *_input2; /**< Source tensor 2 */
+ ITensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEBITWISEORKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseXorKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEBITWISEXORKERNEL_H
+#define ARM_COMPUTE_NEBITWISEXORKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to perform bitwise exclusive OR (XOR) between two tensors
+ *
+ * Result is computed by:
+ * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f]
+ */
+class NEBitwiseXorKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEBitwiseXorKernel";
+ }
+ /** Default constructor */
+ NEBitwiseXorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBitwiseXorKernel(const NEBitwiseXorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBitwiseXorKernel &operator=(const NEBitwiseXorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEBitwiseXorKernel(NEBitwiseXorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEBitwiseXorKernel &operator=(NEBitwiseXorKernel &&) = default;
+ /** Default destructor */
+ ~NEBitwiseXorKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input1 An input tensor. Data type supported: U8.
+ * @param[in] input2 An input tensor. Data type supported: U8
+ * @param[out] output The output tensor. Data type supported: U8.
+ */
+ void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input1; /**< Source tensor 1 */
+ const ITensor *_input2; /**< Source tensor 2 */
+ ITensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEBITWISEXORKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
+#include "src/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H
+#define ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the bounding box kernel */
+class NEBoundingBoxTransformKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEBoundingBoxTransformKernel";
+ }
+
+ /** Default constructor */
+ NEBoundingBoxTransformKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBoundingBoxTransformKernel(const NEBoundingBoxTransformKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBoundingBoxTransformKernel &operator=(const NEBoundingBoxTransformKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEBoundingBoxTransformKernel(NEBoundingBoxTransformKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEBoundingBoxTransformKernel &operator=(NEBoundingBoxTransformKernel &&) = default;
+ /** Default destructor */
+ ~NEBoundingBoxTransformKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
+ * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
+ * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
+ * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input.
+ * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
+ *
+ * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
+ *
+ */
+ void configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform
+ *
+ * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
+ * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
+ * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
+ * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input.
+ * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
+ *
+ * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
+ *
+ * @return a Status
+ */
+ static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ template <typename T>
+ void internal_run(const Window &window);
+
+ const ITensor *_boxes;
+ ITensor *_pred_boxes;
+ const ITensor *_deltas;
+ BoundingBoxTransformInfo _bbinfo;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h"
+#include "src/core/NEON/kernels/NEBox3x3Kernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEBOX3x3KERNEL_H
+#define ARM_COMPUTE_NEBOX3x3KERNEL_H
+
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform a Box 3x3 filter */
+class NEBox3x3Kernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEBox3x3Kernel";
+ }
+ /** Default constructor */
+ NEBox3x3Kernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBox3x3Kernel(const NEBox3x3Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBox3x3Kernel &operator=(const NEBox3x3Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEBox3x3Kernel(NEBox3x3Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEBox3x3Kernel &operator=(NEBox3x3Kernel &&) = default;
+ /** Default destructor */
+ ~NEBox3x3Kernel() = default;
+ /** Set the source, destination and border mode of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[out] output Destination tensor. Data type supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output, bool border_undefined);
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+};
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+/** NEON kernel to perform a Box 3x3 filter for FP16 datatype
+ */
+class NEBox3x3FP16Kernel : public NEBox3x3Kernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEBox3x3FP16Kernel";
+ }
+ /** Default constructor */
+ NEBox3x3FP16Kernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBox3x3FP16Kernel(const NEBox3x3FP16Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEBox3x3FP16Kernel &operator=(const NEBox3x3FP16Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEBox3x3FP16Kernel(NEBox3x3FP16Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEBox3x3FP16Kernel &operator=(NEBox3x3FP16Kernel &&) = default;
+ /** Default destructor */
+ ~NEBox3x3FP16Kernel() = default;
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+};
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+/** NEON kernel to perform a Box 3x3 filter for FP16 datatype */
+using NEBox3x3FP16Kernel = NEBox3x3Kernel;
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEBOX3x3KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h"
+#include "src/core/NEON/kernels/NECannyEdgeKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include <cstdint>
#include <tuple>
-using namespace arm_compute;
-
namespace arm_compute
{
-class Coordinates;
-} // namespace arm_compute
-
namespace
{
constexpr int NO_EDGE = 0;
constexpr int EDGE = 255;
constexpr int MAYBE = 127;
-} // namespace
-namespace
-{
inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy)
{
// Constant use for evaluating score1 and score3
}
} // namespace
+NEGradientKernel::~NEGradientKernel() = default;
+
NEGradientKernel::NEGradientKernel()
: _func(nullptr), _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr)
{
gx, gy, magnitude, phase);
}
+NEEdgeNonMaxSuppressionKernel::~NEEdgeNonMaxSuppressionKernel() = default;
NEEdgeNonMaxSuppressionKernel::NEEdgeNonMaxSuppressionKernel()
: _func(nullptr), _magnitude(nullptr), _phase(nullptr), _output(nullptr), _lower_thr(0), _upper_thr(0)
{
magnitude, phase, output);
}
+NEEdgeTraceKernel::~NEEdgeTraceKernel() = default;
NEEdgeTraceKernel::NEEdgeTraceKernel()
: _input(nullptr), _output(nullptr)
{
},
input, output);
}
+} // namespace arm_compute
\ No newline at end of file
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NECANNYEDGEKERNEL_H
+#define ARM_COMPUTE_NECANNYEDGEKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Computes magnitude and quantised phase from inputs gradients. */
+class NEGradientKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGradientKernel";
+ }
+ /** Default constructor */
+ NEGradientKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGradientKernel(const NEGradientKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGradientKernel &operator=(const NEGradientKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGradientKernel(NEGradientKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGradientKernel &operator=(NEGradientKernel &&) = default;
+ /** Default destructor */
+ ~NEGradientKernel();
+
+ /** Initialise the kernel's sources, destinations and border mode.
+ *
+ * @note gx, gy and magnitude must all be the same size (either 16 or 32)
+ *
+ * @param[in] gx Source tensor - Gx component. Data type supported: S16/S32.
+ * @param[in] gy Source tensor - Gy component. Data type supported: same as @p gx.
+ * @param[out] magnitude Destination tensor - Magnitude. Data type supported: U16 (if the data type of @p gx is S16) / U32 (if the data type of @p gx is S32).
+ * @param[out] phase Destination tensor - Quantized phase. Data type supported: U8.
+ * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm
+ */
+ virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+protected:
+ /** Common signature for all the specialised gradient functions
+ *
+ * @param[in] gx_ptr Pointer to the first input tensor.
+ * @param[in] gy_ptr Pointer to the second input tensor.
+ * @param[out] magnitude_ptr Pointer to the first output tensor
+ * @param[out] phase_ptr Pointer to the second output tensor
+ */
+ using GradientFunction = void(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr);
+
+ GradientFunction *_func; /**< Gradient function to use for the particular tensor types passed to configure() */
+ const ITensor *_gx; /**< Source tensor - Gx component */
+ const ITensor *_gy; /**< Source tensor - Gy component */
+ ITensor *_magnitude; /**< Destination tensor - Magnitude */
+ ITensor *_phase; /**< Destination tensor - Quantized phase */
+};
+
+/** NEON kernel to perform Non-Maxima suppression for Canny Edge.
+ *
+ * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input
+ * to characterize points as possible edges. Thus, at the end, each point will be set to EDGE, NO_EDGE or MAYBE.
+ *
+ * @note Hysteresis is computed in @ref NEEdgeTraceKernel
+ */
+class NEEdgeNonMaxSuppressionKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEEdgeNonMaxSuppressionKernel";
+ }
+ /** Default constructor */
+ NEEdgeNonMaxSuppressionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEEdgeNonMaxSuppressionKernel(const NEEdgeNonMaxSuppressionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEEdgeNonMaxSuppressionKernel &operator=(const NEEdgeNonMaxSuppressionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEEdgeNonMaxSuppressionKernel(NEEdgeNonMaxSuppressionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEEdgeNonMaxSuppressionKernel &operator=(NEEdgeNonMaxSuppressionKernel &&) = default;
+ /** Default destructor */
+ ~NEEdgeNonMaxSuppressionKernel();
+
+ /** Initialise the kernel's sources, destination and border mode.
+ *
+ * @param[in] magnitude Source tensor - Magnitude. Data type supported: U16/U32.
+ * @param[in] phase Source tensor - Quantized phase. Data type supported: U8.
+ * @param[out] output Output tensor. Data type supported: U8. It will be filled with 0 for "no edge", 127 for "maybe", 255 for "edge"
+ * @param[in] upper_thr Upper threshold used for the hysteresis
+ * @param[in] lower_thr Lower threshold used for the hysteresis
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ /** Common signature for all the specialised non-maxima suppression functions
+ *
+ * @param[in] magnitude_ptr Pointer to the first input tensor.
+ * @param[in] phase_ptr Pointer to the second input tensor.
+ * @param[out] output_ptr Pointer to the output tensor
+ * @param[in] stride_mag Stride of the magnitude tensor
+ * @param[in] upper_thr Upper threshold used for the hysteresis
+ * @param[in] lower_thr Lower threshold used for the hysteresis
+ */
+ using EdgeNonMaxSupprFunction = void(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t upper_thr,
+ const int32_t lower_thr);
+
+ EdgeNonMaxSupprFunction *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */
+ const ITensor *_magnitude; /**< Source tensor - Magnitude */
+ const ITensor *_phase; /**< Source tensor - Quantized phase */
+ ITensor *_output; /**< Destination tensor */
+ int32_t _lower_thr; /**< Lower threshold used for the hysteresis */
+ int32_t _upper_thr; /**< Upper threshold used for the hysteresis */
+};
+
+/** NEON kernel to perform Edge tracing */
+class NEEdgeTraceKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEEdgeTraceKernel";
+ }
+ /** Default constructor */
+ NEEdgeTraceKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEEdgeTraceKernel(const NEEdgeTraceKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEEdgeTraceKernel &operator=(const NEEdgeTraceKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEEdgeTraceKernel(NEEdgeTraceKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEEdgeTraceKernel &operator=(NEEdgeTraceKernel &&) = default;
+ /** Default destructor */
+ ~NEEdgeTraceKernel();
+
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in,out] input Source tensor. Data type supported: U8. Must contain 0 for "no edge", 127 for "maybe", 255 for "edge"
+ * @param[in,out] output Destination tensor. Data type supported: U8. Must be initialized to 0 (No edge).
+ */
+ void configure(ITensor *input, ITensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+ bool is_parallelisable() const override;
+
+private:
+ ITensor *_input; /**< Source tensor */
+ ITensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NECANNYEDGEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h"
+#include "src/core/NEON/kernels/NEChannelCombineKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H
+#define ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+#include <array>
+#include <cstdint>
+
+namespace arm_compute
+{
+class IMultiImage;
+class ITensor;
+using IImage = ITensor;
+
+/** Interface for the channel combine kernel */
+class NEChannelCombineKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEChannelCombineKernel";
+ }
+ /** Default constructor */
+ NEChannelCombineKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEChannelCombineKernel(const NEChannelCombineKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEChannelCombineKernel &operator=(const NEChannelCombineKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEChannelCombineKernel(NEChannelCombineKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEChannelCombineKernel &operator=(NEChannelCombineKernel &&) = default;
+ /** Default destructor */
+ ~NEChannelCombineKernel() = default;
+
+ /** Configure function's inputs and outputs.
+ *
+ * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8
+ * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8
+ * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8
+ * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8
+ * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
+ */
+ void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output);
+ /** Configure function's inputs and outputs.
+ *
+ * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8
+ * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8
+ * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8
+ * @param[out] output The multi planar output tensor. Formats supported: NV12/NV21/IYUV/YUV444
+ */
+ void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ bool is_parallelisable() const override;
+
+private:
+ /** Combine 3 planes to form a three channel single plane tensor.
+ *
+ * @param[in] win Region on which to execute the kernel.
+ */
+ void combine_3C(const Window &win);
+ /** Combine 4 planes to form a four channel single plane tensor.
+ *
+ * @param[in] win Region on which to execute the kernel.
+ */
+ void combine_4C(const Window &win);
+ /** Combine 3 planes to form a single plane YUV tensor.
+ *
+ * @param[in] win Region on which to execute the kernel.
+ */
+ template <bool is_yuyv>
+ void combine_YUV_1p(const Window &win);
+ /** Combine 3 planes to form a two plane YUV tensor.
+ *
+ * @param[in] win Region on which to execute the kernel.
+ */
+ void combine_YUV_2p(const Window &win);
+ /** Combine 3 planes to form a three plane YUV tensor.
+ *
+ * @param[in] win Region on which to execute the kernel.
+ */
+ void combine_YUV_3p(const Window &win);
+ /** Copies a full plane to the output tensor.
+ *
+ * @param[in] win Region on which to execute the kernel.
+ */
+ void copy_plane(const Window &win, uint32_t plane_id);
+ /** Common signature for all the specialised ChannelCombine functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using ChannelCombineFunction = void (NEChannelCombineKernel::*)(const Window &window);
+ /** ChannelCombine function to use for the particular tensor types passed to configure() */
+ ChannelCombineFunction _func;
+ std::array<const ITensor *, 4> _planes;
+ ITensor *_output;
+ IMultiImage *_output_multi;
+ std::array<uint32_t, 3> _x_subsampling;
+ std::array<uint32_t, 3> _y_subsampling;
+ unsigned int _num_elems_processed_per_iteration;
+ bool _is_parallelizable;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h"
+#include "src/core/NEON/kernels/NEChannelExtractKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IMultiImage.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/MultiImageInfo.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H
+#define ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+class IMultiImage;
+class ITensor;
+using IImage = ITensor;
+
+/** Interface for the channel extract kernel */
+class NEChannelExtractKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEChannelExtractKernel";
+ }
+ /** Default constructor */
+ NEChannelExtractKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEChannelExtractKernel(const NEChannelExtractKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEChannelExtractKernel &operator=(const NEChannelExtractKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEChannelExtractKernel(NEChannelExtractKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEChannelExtractKernel &operator=(NEChannelExtractKernel &&) = default;
+ /** Default destructor */
+ ~NEChannelExtractKernel() = default;
+
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
+ * @param[in] channel Channel to extract.
+ * @param[out] output Destination tensor. Format supported: U8
+ */
+ void configure(const ITensor *input, Channel channel, ITensor *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
+ * @param[in] channel Channel to extract.
+ * @param[out] output Single-planar destination image. Format supported: U8
+ */
+ void configure(const IMultiImage *input, Channel channel, IImage *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Extract one channel from a two channel planar tensor.
+ *
+ * @param[in] win Region on which to execute the kernel.
+ */
+ void extract_1C_from_2C_img(const Window &win);
+ /** Extract one channel from a three channel planar tensor.
+ *
+ * @param[in] win Region on which to execute the kernel.
+ */
+ void extract_1C_from_3C_img(const Window &win);
+ /** Extract one channel from a four channel planar tensor.
+ *
+ * @param[in] win Region on which to execute the kernel.
+ */
+ void extract_1C_from_4C_img(const Window &win);
+ /** Extract U/V channel from a single planar YUVY/UYVY tensor.
+ *
+ * @param[in] win Region on which to execute the kernel.
+ */
+ void extract_YUYV_uv(const Window &win);
+ /** Copies a full plane to the output tensor.
+ *
+ * @param[in] win Region on which to execute the kernel.
+ */
+ void copy_plane(const Window &win);
+ /** Common signature for all the specialised ChannelExtract functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using ChannelExtractFunction = void (NEChannelExtractKernel::*)(const Window &window);
+ /** ChannelExtract function to use for the particular tensor types passed to configure() */
+ ChannelExtractFunction _func;
+ unsigned int _lut_index;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h"
+#include "src/core/NEON/kernels/NEChannelShuffleLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H
+#define ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the channel shuffle kernel */
+class NEChannelShuffleLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEChannelShuffleLayerKernel";
+ }
+ /** Default constructor */
+ NEChannelShuffleLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEChannelShuffleLayerKernel(const NEChannelShuffleLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEChannelShuffleLayerKernel &operator=(const NEChannelShuffleLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEChannelShuffleLayerKernel(NEChannelShuffleLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEChannelShuffleLayerKernel &operator=(NEChannelShuffleLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEChannelShuffleLayerKernel() = default;
+ /** Configure function's inputs and outputs.
+ *
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
+ */
+ void configure(const ITensor *input, ITensor *output, unsigned int num_groups);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEChannelShuffleLayerKernel
+ *
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+ unsigned int _num_groups;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
+#include "src/core/NEON/kernels/NECol2ImKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NECOL2IMKERNEL_H
+#define ARM_COMPUTE_NECOL2IMKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+#include "arm_compute/core/Size2D.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform col2im reshaping.
+ *
+ * Rearranges each matrix column into image blocks. It's the inverse operation of @ref NEIm2ColKernel.
+ *
+ * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3:
+ *
+ * @f[
+ * \left( \begin{array}{ccccccccc}
+ * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccc}
+ * a0 & a1 & a2 \\
+ * a3 & a4 & a5 \\
+ * a6 & a7 & a8 \\
+ * \end{array} \right)
+ * @f]
+ */
+class NECol2ImKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NECol2ImKernel";
+ }
+ /** Default constructor */
+ NECol2ImKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECol2ImKernel(const NECol2ImKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECol2ImKernel &operator=(const NECol2ImKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NECol2ImKernel(NECol2ImKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NECol2ImKernel &operator=(NECol2ImKernel &&) = default;
+ /** Default destructor */
+ ~NECol2ImKernel() = default;
+
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] input The input tensor to convert. Data types supported: All
+ * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
+ * while the rest represent batch of outputs. Data types supported: Same as @p input
+ * @param[in] convolved_dims Output convolved dimensions.
+ */
+ void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims);
+ /** Static function to check if given info will lead to a valid configuration of @ref NECol2ImKernel
+ *
+ * @param[in] input The input tensor to convert. Data types supported: All
+ * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
+ * while the rest represent batch of outputs. Data types supported: Same as @p input
+ * @param[in] convolved_dims Output convolved dimensions.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Template function to run the col2im
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <typename T>
+ void run_col2im(const Window &window);
+
+ /** Common signature for all the specialised col2im functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using Col2ImFunctionPtr = void (NECol2ImKernel::*)(const Window &window);
+
+ Col2ImFunctionPtr _func;
+ const ITensor *_input;
+ ITensor *_output;
+ Size2D _convolved_dims;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NECOL2IMKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h"
+#include "src/core/NEON/kernels/NEColorConvertKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_COLORCONVERTKERNEL_H
+#define ARM_COMPUTE_COLORCONVERTKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class IMultiImage;
+class ITensor;
+using IImage = ITensor;
+
+/** Interface for the color convert kernel */
+class NEColorConvertKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEColorConvertKernel";
+ }
+ /** Default constructor */
+ NEColorConvertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEColorConvertKernel(const NEColorConvertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEColorConvertKernel &operator=(const NEColorConvertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEColorConvertKernel(NEColorConvertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEColorConvertKernel &operator=(NEColorConvertKernel &&) = default;
+ /** Default destructor */
+ ~NEColorConvertKernel() = default;
+
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
+ * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
+ * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
+ * U8 (if the formats of @p input is RGB888)
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
+ * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
+ */
+ void configure(const IMultiImage *input, IImage *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
+ * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
+ */
+ void configure(const IImage *input, IMultiImage *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
+ * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
+ */
+ void configure(const IMultiImage *input, IMultiImage *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ using ColorConvertFunction = void(const void *__restrict input_ptr, void *__restrict output_ptr, const Window &win);
+ const void *_input;
+ void *_output;
+ ColorConvertFunction *_func;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NECOLORCONVERTKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
+#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
+#define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa.
+ *
+ * @note This function can be applied to the 2D weights used by a Fully Connected layer if:
+ * - It follows a Convolution layer
+ * - The data layout used by the network does not match the one the model has been trained in.
+ *
+ * @note This function assumes the weights are already reshaped (transposed)
+ */
+class NEConvertFullyConnectedWeightsKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEConvertFullyConnectedWeightsKernel";
+ }
+ /** Default constructor */
+ NEConvertFullyConnectedWeightsKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvertFullyConnectedWeightsKernel(const NEConvertFullyConnectedWeightsKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvertFullyConnectedWeightsKernel &operator=(const NEConvertFullyConnectedWeightsKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEConvertFullyConnectedWeightsKernel(NEConvertFullyConnectedWeightsKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEConvertFullyConnectedWeightsKernel &operator=(NEConvertFullyConnectedWeightsKernel &&) = default;
+ /** Default destructor */
+ ~NEConvertFullyConnectedWeightsKernel() = default;
+ /** Set the input and output tensor.
+ *
+ * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
+ * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
+ * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
+ * @param[in] data_layout The data layout the weights have been trained in.
+ */
+ void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeightsKernel
+ *
+ * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
+ * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input.
+ * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
+ * @param[in] data_layout The data layout the weights have been trained in.
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Template function to run the permute
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <typename T>
+ void run_convert_fc_weights(const Window &window);
+
+ const ITensor *_input;
+ ITensor *_output;
+ unsigned int _factor1; /* equals to the number of elements per original input plane if @p data_layout == NCHW; its number of channels otherwise */
+ unsigned int _factor2; /* equals to the number of elements per original input plane if @p data_layout == NHWC; its number of channels otherwise */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H
+#define ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** NEON kernel to convert asymmetric signed to asymmetric signed and vice-versa */
+class NEConvertQuantizedSignednessKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEConvertQuantizedSignednessKernel";
+ }
+ /** Default constructor */
+ NEConvertQuantizedSignednessKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NEConvertQuantizedSignednessKernel(const NEConvertQuantizedSignednessKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NEConvertQuantizedSignednessKernel &operator=(const NEConvertQuantizedSignednessKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEConvertQuantizedSignednessKernel(NEConvertQuantizedSignednessKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEConvertQuantizedSignednessKernel &operator=(NEConvertQuantizedSignednessKernel &&) = default;
+ /** Default destructor */
+ ~NEConvertQuantizedSignednessKernel() = default;
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Data types supported: opposite of @p input.
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor. Data types supported: opposite of @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
+#include "src/core/NEON/kernels/NEConvolutionKernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NECONVOLUTIONKERNEL_H
+#define ARM_COMPUTE_NECONVOLUTIONKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+#include "src/core/NEON/INESimpleKernel.h"
+
+#include <array>
+#include <cstdint>
+#include <vector>
+
+namespace arm_compute
+{
+class ITensor;
+
+/****************************************************************************************\
+ * Square Convolution *
+\****************************************************************************************/
+
+/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9).
+ * The client can supply a convolution matrix \f$ C_{m,n} \f$.
+ * @f{eqnarray}{
+ * k_0 &=& \frac{m}{2} \\
+ * l_0 &=& \frac{n}{2} \\
+ * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l}
+ * @f}
+ *
+ * @note The above equation for this function is similar to the default OpenCV Filter2D function,
+ * which actually computes a correlation and not a convolution.
+ * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically.
+ */
+template <unsigned int matrix_size>
+class NEConvolutionKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEConvolutionKernel";
+ }
+ /** Default constructor */
+ NEConvolutionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NEConvolutionKernel(const NEConvolutionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NEConvolutionKernel &operator=(const NEConvolutionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEConvolutionKernel(NEConvolutionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEConvolutionKernel &operator=(NEConvolutionKernel &&) = default;
+ /** Default destructor */
+ ~NEConvolutionKernel() = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8, S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ template <typename OutputType>
+ void convolution(const Window &win);
+
+protected:
+ uint32_t _scale; /**< scale of the convolution */
+ std::array<int16_t, matrix_size *matrix_size> _convolution; /**< convolution matrix */
+};
+
+/** Interface for the kernel which applied a 3x3 convolution to a tensor.*/
+using NEConvolution3x3Kernel = NEConvolutionKernel<3>;
+/** Interface for the kernel which applied a 5x5 convolution to a tensor.*/
+using NEConvolution5x5Kernel = NEConvolutionKernel<5>;
+/** Interface for the kernel which applied a 7x7 convolution to a tensor.*/
+using NEConvolution7x7Kernel = NEConvolutionKernel<7>;
+///** Interface for the kernel which applied a 9x9 convolution to a tensor.*/
+using NEConvolution9x9Kernel = NEConvolutionKernel<9>;
+
+/****************************************************************************************\
+ * Separable Square Convolution *
+\****************************************************************************************/
+
+/** Kernel for the Horizontal pass of a Separable Convolution */
+template <unsigned int matrix_size>
+class NESeparableConvolutionHorKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NESeparableConvolutionHorKernel";
+ }
+ /** Default constructor */
+ NESeparableConvolutionHorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NESeparableConvolutionHorKernel(const NESeparableConvolutionHorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NESeparableConvolutionHorKernel &operator=(const NESeparableConvolutionHorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NESeparableConvolutionHorKernel(NESeparableConvolutionHorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NESeparableConvolutionHorKernel &operator=(NESeparableConvolutionHorKernel &&) = default;
+ /** Default destructor */
+ ~NESeparableConvolutionHorKernel() = default;
+
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U16, S16, S32.
+ * @param[in] conv_row Convolution matrix to apply to the input tensor.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ /** Apply the object's convolution to the given window of the input tensor..
+ *
+ * @param[in] window Window to apply the convolution on.
+ */
+ template <typename OutputType>
+ void convolve(const Window &window);
+
+ std::array<int16_t, matrix_size> _conv_row; /**< Convolution coefficients */
+ BorderSize _border_size; /**< Border size */
+};
+
+/** Interface for the kernel which applied a 5x1 horizontal convolution to a tensor.*/
+using NESeparableConvolution5x5HorKernel = NESeparableConvolutionHorKernel<5>;
+/** Interface for the kernel which applied a 7x1 horizontal convolution to a tensor.*/
+using NESeparableConvolution7x7HorKernel = NESeparableConvolutionHorKernel<7>;
+/** Interface for the kernel which applied a 9x1 horizontal convolution to a tensor.*/
+using NESeparableConvolution9x9HorKernel = NESeparableConvolutionHorKernel<9>;
+
+/** Kernel for the Vertical pass of a Separable Convolution */
+template <unsigned int matrix_size>
+class NESeparableConvolutionVertKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NESeparableConvolutionVertKernel";
+ }
+ /** Default constructor */
+ NESeparableConvolutionVertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NESeparableConvolutionVertKernel(const NESeparableConvolutionVertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NESeparableConvolutionVertKernel &operator=(const NESeparableConvolutionVertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NESeparableConvolutionVertKernel(NESeparableConvolutionVertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NESeparableConvolutionVertKernel &operator=(NESeparableConvolutionVertKernel &&) = default;
+ /** Default destructor */
+ ~NESeparableConvolutionVertKernel() = default;
+
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data type supported: U16, S16, S32.
+ * @param[out] output Destination tensor, Data types supported: U8, S16.
+ * @param[in] conv_col Convolution matrix to apply to the input tensor.
+ * @param[in] scale Scale of the convolution matrix
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ /** Apply the object's convolution to the given window of the input tensor.
+ * This function is used if the intermediate values have been stored as U16.
+ *
+ * @param[in] win Window to apply the convolution on.
+ */
+ template <typename OutputType>
+ void convolution_u16(const Window &win);
+ /** Apply the object's convolution to the given window of the input tensor.
+ * This function is used if the intermediate values have been stored as S16.
+ *
+ * @param[in] win Window to apply the convolution on.
+ */
+ template <typename OutputType>
+ void convolution_s16(const Window &win);
+ /** Apply the object's convolution to the given window of the input tensor.
+ * This function is used if the intermediate values have been stored as S32.
+ *
+ * @param[in] win Window to apply the convolution on.
+ */
+ template <typename OutputType>
+ void convolution_s32(const Window &win);
+
+ std::array<int16_t, matrix_size> _conv_col; /**< Convolution coefficients */
+ uint32_t _scale; /**< Convolution's scale */
+};
+
+/** Interface for the kernel which applied a 1x5 vertical convolution to a tensor.*/
+using NESeparableConvolution5x5VertKernel = NESeparableConvolutionVertKernel<5>;
+/** Interface for the kernel which applied a 1x7 vertical convolution to a tensor.*/
+using NESeparableConvolution7x7VertKernel = NESeparableConvolutionVertKernel<7>;
+/** Interface for the kernel which applied a 1x9 vertical convolution to a tensor.*/
+using NESeparableConvolution9x9VertKernel = NESeparableConvolutionVertKernel<9>;
+
+/****************************************************************************************\
+ * Rectangle Convolution *
+\****************************************************************************************/
+
+/** Kernel for the running convolution on a rectangle matrix.
+ *
+ * @note Supports combinations of 3,5,7 and 9.
+ */
+class NEConvolutionRectangleKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEConvolutionRectangleKernel";
+ }
+ /** Default constructor */
+ NEConvolutionRectangleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &&) = default;
+ /** Default destructor */
+ ~NEConvolutionRectangleKernel() = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8, S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] width Width of convolution matrix (Number of columns)
+ * @param[in] height Height of convolution matrix (Number of rows)
+ * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ unsigned int get_index(uint32_t val);
+ /** Apply the object's convolution to the given window of the input tensor.
+ *
+ * @param[in] win Window to apply the convolution on.
+ */
+ template <typename OutputType, unsigned int rows, unsigned int cols>
+ void convolution(const Window &win);
+
+protected:
+ const ITensor *_input; /**< Input tensor */
+ ITensor *_output; /**< Output tensor */
+ uint32_t _scale; /**< Scale of the convolution */
+ std::vector<int16_t> _convolution; /**< Convolution matrix */
+ BorderSize _border_size; /**< Calculated border width */
+ uint32_t _func_idx; /**< Index used to specify convolution function to be used */
+ const static unsigned int _nr_supported_sizes
+ {
+ 4
+ }; /**< Number of supported permutations */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NECONVOLUTIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
+#include "src/core/NEON/kernels/NECopyKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NECOPYKERNEL_H
+#define ARM_COMPUTE_NECOPYKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform a copy between two tensors */
+class NECopyKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NECopyKernel";
+ }
+ /** Default constructor */
+ NECopyKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NECopyKernel(const NECopyKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NECopyKernel &operator=(const NECopyKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NECopyKernel(NECopyKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NECopyKernel &operator=(NECopyKernel &&) = default;
+ /** Default destructor */
+ ~NECopyKernel() = default;
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] input Source tensor. Data types supported: All
+ * @param[out] output Destination tensor. Data types supported: same as @p input.
+ * @param[in] padding (Optional) Padding to be applied to the input tensor
+ */
+ void configure(const ITensor *input, ITensor *output, const PaddingList &padding = PaddingList());
+ /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel
+ *
+ * @param[in] input Source tensor. Data types supported: All
+ * @param[in] output Destination tensor. Data types supported: same as @p input.
+ * @param[in] padding (Optional) Padding to be applied to the input tensor
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding = PaddingList());
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+ PaddingList _padding;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NECOPYKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NECropKernel.h"
+#include "src/core/NEON/kernels/NECropKernel.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEON_CROP_KERNEL_H
+#define ARM_COMPUTE_NEON_CROP_KERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the kernel to perform tensor cropping */
+class NECropKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NECropKernel";
+ }
+ /** Default constructor */
+ NECropKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECropKernel(const NECropKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECropKernel &operator=(const NECropKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NECropKernel(NECropKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NECropKernel &operator=(NECropKernel &&) = default;
+ /** Default destructor */
+ ~NECropKernel() = default;
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ * @note Padding not supported.
+ *
+ * @param[in] input Source tensor. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
+ * @param[in] crop_boxes Tensor containing all possible boxes used to crop the image, each represented by 4 normalized values.
+ * Data type supported: F32
+ * @param[in] box_ind One dimensional tensor mapping the @p crop_box_ind to the index of the 3D image in @p input.
+ * Data type supported: F32
+ * @param[out] output Destination tensor. Data type supported: F32
+ * @param[in] crop_box_ind Index of the crop box to be used from @p crop_boxes. Default is 0.
+ * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
+ */
+ void configure(const ITensor *input, const ITensor *crop_boxes, const ITensor *box_ind, ITensor *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
+ *
+ * @note Supported tensor rank: up to 4
+ * @note Padding not supported.
+ *
+ * @param[in] input Source tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
+ * @param[in] crop_boxes Tensor info for tensor containing all possible boxes used to crop the image. Data type supported: F32
+ * @param[in] box_ind Tensor info for the one dimensional tensor mapping the @p crop_box_ind to the index of the 3D image
+ * in @p input. Data type supported: F32
+ * @param[in] output Destination tensor. Data type supported: F32
+ * @param[in] crop_box_ind Index of the crop box to be used from @p crop_boxes. Default is 0.
+ * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *crop_boxes, const ITensorInfo *box_ind, const ITensorInfo *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0);
+
+ /** Configure output tensor's shape as this can only be determined at runtime. */
+ void configure_output_shape();
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+ /** Function to use for in bounds crop for the particular tensor types passed to configure() */
+ using InBoundsCropFunction = void(const ITensor *, const ITensor *, float *, Coordinates, int32_t, int32_t, int32_t, bool, bool);
+
+private:
+ const ITensor *_input;
+ const ITensor *_crop_boxes;
+ const ITensor *_box_ind;
+ ITensor *_output;
+
+ Coordinates _start;
+ Coordinates _end;
+ uint32_t _crop_box_ind;
+ float _extrapolation_value;
+ /** The number of rows out of bounds at the start and end of output. */
+ std::array<uint32_t, 2> _rows_out_of_bounds;
+ /** The number of columns out of bounds at the start and end of output. */
+ std::array<uint32_t, 2> _cols_out_of_bounds;
+
+ NECropKernel::InBoundsCropFunction *_in_bounds_crop_function;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEON_CROP_KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h"
+#include "src/core/NEON/kernels/NECumulativeDistributionKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H
+#define ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class IDistribution1D;
+class ILut;
+class ITensor;
+using IImage = ITensor;
+
+/** Interface for the cumulative distribution (cummulative summmation) calculation kernel.
+ *
+ * This kernel calculates the cumulative sum of a given distribution (meaning that each output element
+ * is the sum of all its previous elements including itself) and creates a lookup table with the normalized
+ * pixel intensities which is used for improve the constrast of the image.
+ */
+class NECumulativeDistributionKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NECumulativeDistributionKernel";
+ }
+ /** Default constructor */
+ NECumulativeDistributionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECumulativeDistributionKernel(const NECumulativeDistributionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECumulativeDistributionKernel &operator=(const NECumulativeDistributionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NECumulativeDistributionKernel(NECumulativeDistributionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NECumulativeDistributionKernel &operator=(NECumulativeDistributionKernel &&) = default;
+ /** Default destructor */
+ ~NECumulativeDistributionKernel() = default;
+ /** Set the input and output distribution.
+ *
+ * @param[in] input Input image. Data type supported: U8
+ * @param[in] distribution Unnormalized 256-bin distribution of the input image.
+ * @param[out] cumulative_sum Cummulative distribution (Summed histogram). Should be same size as @p distribution.
+ * @param[out] output Equalization lookup table. Should consist of 256 entries of U8 elements.
+ */
+ void configure(const IImage *input, const IDistribution1D *distribution, IDistribution1D *cumulative_sum, ILut *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ bool is_parallelisable() const override;
+
+private:
+ const IImage *_input; /**< Input image. */
+ const IDistribution1D *_distribution; /**< Input histogram of the input image. */
+ IDistribution1D *_cumulative_sum; /**< The cummulative distribution. */
+ ILut *_output; /**< Output with the equalization lookup table. */
+private:
+ static const uint32_t _histogram_size = 256; /**< Default histogram size of 256. */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H
+#define ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the depth concatenate kernel.
+ * The input tensor will be concatenated into the output tensor.
+ */
+class NEDepthConcatenateLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEDepthConcatenateLayerKernel";
+ }
+ /** Default constructor */
+ NEDepthConcatenateLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthConcatenateLayerKernel(const NEDepthConcatenateLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthConcatenateLayerKernel &operator=(const NEDepthConcatenateLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEDepthConcatenateLayerKernel(NEDepthConcatenateLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEDepthConcatenateLayerKernel &operator=(NEDepthConcatenateLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEDepthConcatenateLayerKernel() = default;
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] depth_offset The offset on the Z axis.
+ * @param[in,out] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+ * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+ *
+ */
+ void configure(const ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] depth_offset The offset on the Z axis.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+
+private:
+ using DepthConcatFunction = void(const ITensor *in, ITensor *out, unsigned int depth_offset, const Window &window);
+
+private:
+ DepthConcatFunction *_func;
+ unsigned int _depth_offset;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
+#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_DEPTHCONVERTKERNEL_H
+#define ARM_COMPUTE_DEPTHCONVERTKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Depth conversion kernel
+ * This function ignores the scale and zeroPoint of quanized tensors, i.e. QASYMM8 input is treated as uint8 values.
+ */
+class NEDepthConvertLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEDepthConvertLayerKernel";
+ }
+ /** Default constructor*/
+ NEDepthConvertLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthConvertLayerKernel(const NEDepthConvertLayerKernel &) = delete;
+ /** Default move constructor */
+ NEDepthConvertLayerKernel(NEDepthConvertLayerKernel &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthConvertLayerKernel &operator=(const NEDepthConvertLayerKernel &) = delete;
+ /** Default move assignment operator */
+ NEDepthConvertLayerKernel &operator=(NEDepthConvertLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEDepthConvertLayerKernel() = default;
+ /** Set the input and output of the kernel
+ *
+ * Valid conversions Input -> Output :
+ *
+ * - QASYMM8_SIGNED -> S16, S32, F32, F16
+ * - QASYMM8 -> U16, S16, S32, F32, F16
+ * - U8 -> U16, S16, S32, F32, F16
+ * - U16 -> U8, U32
+ * - S16 -> QASYMM8_SIGNED, U8, S32
+ * - BFLOAT16 -> F32
+ * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8
+ * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8
+ * - F32 -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8
+ *
+ * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32.
+ * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32.
+ * @param[in] policy Conversion policy.
+ * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
+ */
+ void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayerKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32.
+ * @param[in] policy Conversion policy
+ * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+ ConvertPolicy _policy;
+ uint32_t _shift;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
+#include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H
+#define ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the depth to space kernel */
+class NEDepthToSpaceLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEDepthToSpaceLayerKernel";
+ }
+ /** Default constructor */
+ NEDepthToSpaceLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthToSpaceLayerKernel(const NEDepthToSpaceLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthToSpaceLayerKernel &operator=(const NEDepthToSpaceLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEDepthToSpaceLayerKernel(NEDepthToSpaceLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEDepthToSpaceLayerKernel &operator=(NEDepthToSpaceLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEDepthToSpaceLayerKernel() = default;
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ * @param[in] block_shape Block shape x value.
+ */
+ void configure(const ITensor *input, ITensor *output, int32_t block_shape);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthToSpaceLayerKernel.
+ *
+ * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All
+ * @param[in] output Tensor output info. Data types supported: same as @p input
+ * @param[in] block_shape Block shape value.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input; /**< Source tensor */
+ ITensor *_output; /**< Destination tensor */
+ int32_t _block_shape; /**< Block shape */
+ DataLayout _data_layout; /**< Data layout of the operation */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
+#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
+#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
+
+#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/core/NEON/INEKernel.h"
+#include "support/Requires.h"
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#include <arm_neon.h>
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the kernel to run a depthwise convolution native on a tensor. */
+class NEDepthwiseConvolutionLayerNativeKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEDepthwiseConvolutionLayerNativeKernel";
+ }
+ /** Default constructor */
+ NEDepthwiseConvolutionLayerNativeKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthwiseConvolutionLayerNativeKernel(const NEDepthwiseConvolutionLayerNativeKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthwiseConvolutionLayerNativeKernel &operator=(const NEDepthwiseConvolutionLayerNativeKernel &) = delete;
+ /** Default Move Constructor. */
+ NEDepthwiseConvolutionLayerNativeKernel(NEDepthwiseConvolutionLayerNativeKernel &&) = default;
+ /** Default move assignment operator */
+ NEDepthwiseConvolutionLayerNativeKernel &operator=(NEDepthwiseConvolutionLayerNativeKernel &&) = default;
+ /** Default destructor */
+ ~NEDepthwiseConvolutionLayerNativeKernel() = default;
+ /** Initialize the function's source, destination and parameters.
+ *
+ * @note Supported data layouts: NHWC
+ *
+ * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ *
+ */
+ void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
+ const Size2D &dilation = Size2D(1U, 1U));
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerNativeKernel
+ *
+ * @note Supported data layouts: NHWC
+ *
+ * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
+ const Size2D &dilation = Size2D(1U, 1U));
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ template <typename T>
+ using FloatEnalber = typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, int>::type;
+
+ template <typename T, typename TW, FloatEnalber<T> = 0>
+ void run_depthwise(const Window &window, bool has_biases);
+
+ template <typename T>
+ using Quantized8bitEnalber = typename std::enable_if < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int >::type;
+
+ template <typename T, typename TW, Quantized8bitEnalber<T> = 0>
+ void run_depthwise(const Window &window, bool has_biases);
+
+ /** Common signature for all the specialised depthwise convolution native functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using DepthwiseFunctionPtr = void (NEDepthwiseConvolutionLayerNativeKernel::*)(const Window &window, bool has_biases);
+
+ DepthwiseFunctionPtr _func;
+ const ITensor *_input;
+ const ITensor *_weights;
+ const ITensor *_biases;
+ ITensor *_output;
+ PadStrideInfo _conv_info;
+ unsigned int _depth_multiplier;
+ Size2D _dilation;
+ std::vector<int> _output_multiplier;
+ std::vector<int> _output_shift;
+ bool _has_biases;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
+#include "src/core/NEON/kernels/NEDequantizationLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H
+#define ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the dequantization layer kernel. */
+class NEDequantizationLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEDequantizationLayerKernel";
+ }
+ /** Default constructor */
+ NEDequantizationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDequantizationLayerKernel(const NEDequantizationLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDequantizationLayerKernel &operator=(const NEDequantizationLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ NEDequantizationLayerKernel(NEDequantizationLayerKernel &&) = default;
+ /** Default move assignment operator */
+ NEDequantizationLayerKernel &operator=(NEDequantizationLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEDequantizationLayerKernel() = default;
+ /** Set input, output tensors.
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[in] output Output tensor info. Data types supported: F16/F32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h"
+#include "src/core/NEON/kernels/NEDerivativeKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEDERIVATIVEKERNEL_H
+#define ARM_COMPUTE_NEDERIVATIVEKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to run the derivative along the X/Y directions on a tensor.
+ *
+ */
+class NEDerivativeKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEDerivativeKernel";
+ }
+ /** Default constructor */
+ NEDerivativeKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDerivativeKernel(const NEDerivativeKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDerivativeKernel &operator=(const NEDerivativeKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEDerivativeKernel(NEDerivativeKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEDerivativeKernel &operator=(NEDerivativeKernel &&) = default;
+ /** Default destructor */
+ ~NEDerivativeKernel() = default;
+ /** Initialise the kernel's sources, destination and border
+ *
+ * @note At least one of output_x or output_y must be set
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ /** Function to perform derivative along the X direction on the given window
+ *
+ * @param[in] window Region on which to execute the kernel
+ */
+ void derivative_x(const Window &window);
+ /** Function to perform derivative along the Y direction on the given window
+ *
+ * @param[in] window Region on which to execute the kernel
+ */
+ void derivative_y(const Window &window);
+ /** Function to perform derivative along the X and Y direction on the given window
+ *
+ * @param[in] window Region on which to execute the kernel
+ */
+ void derivative_xy(const Window &window);
+ /** Common signature for all the specialised derivative functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using DerivativeFunction = void (NEDerivativeKernel::*)(const Window &window);
+ /** Derivative function to use for the particular tensor types passed to configure() */
+ DerivativeFunction _func;
+
+private:
+ const ITensor *_input; /**< Input tensor */
+ ITensor *_output_x; /**< Output tensor - Derivate along the X direction */
+ ITensor *_output_y; /**< Output tensor - Derivate along the Y direction */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEDERIVATIVEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEDilateKernel.h"
+#include "src/core/NEON/kernels/NEDilateKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEDILATEKERNEL_H
+#define ARM_COMPUTE_NEDILATEKERNEL_H
+
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to perform boolean image dilatation */
+class NEDilateKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEDilateKernel";
+ }
+ /** Default constructor */
+ NEDilateKernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDilateKernel(const NEDilateKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDilateKernel &operator=(const NEDilateKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEDilateKernel(NEDilateKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEDilateKernel &operator=(NEDilateKernel &&) = default;
+ /** Default destructor */
+ ~NEDilateKernel() = default;
+ /** Set the source, destination and border mode of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: U8
+ * @param[out] output Destination tensor. Data type supported: U8
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output, bool border_undefined);
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEDILATEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
+#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
#include "src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h"
#include "src/core/NEON/wrapper/wrapper.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H
+#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON interface for Direct Convolution Layer kernel */
+class NEDirectConvolutionLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEDirectConvolutionLayerKernel";
+ }
+ /** Default constructor */
+ NEDirectConvolutionLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDirectConvolutionLayerKernel(const NEDirectConvolutionLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDirectConvolutionLayerKernel &operator=(const NEDirectConvolutionLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEDirectConvolutionLayerKernel(NEDirectConvolutionLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEDirectConvolutionLayerKernel &operator=(NEDirectConvolutionLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEDirectConvolutionLayerKernel() = default;
+ /** Set the input, weights, and output tensors.
+ *
+ * @note: DirectConvolution only works in the following configurations:
+ * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3
+ * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3
+ *
+ * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * The 3rd dimension must be the same as the input's volume 3rd dimension.
+ * Data type supported:Same as @p input.
+ * @param[out] output Output tensor.
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ */
+ void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerKernel
+ *
+ * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * The 3rd dimension must be the same as the input's volume 3rd dimension.
+ * Data type supported:Same as @p input.
+ * @param[in] output Output tensor.
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ /* Template function for optimized convolution NHWC */
+ template <typename T>
+ void convolve_nhwc_optimized(const Window &window);
+
+ /* Template function for convolution NHWC */
+ template <typename T>
+ void convolve_nhwc(const Window &window);
+
+ const ITensor *_input;
+ const ITensor *_weights;
+ ITensor *_output;
+ PadStrideInfo _conv_info;
+ BorderSize _border_size;
+ unsigned int _kernel_size;
+ unsigned int _num_weight_elems_read_per_row;
+ unsigned int _num_elems_read_per_iteration;
+ unsigned int _num_elems_written_per_iteration;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H
+#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input.
+ *
+ * @note We assume bias to be shared
+ * @note For quantized computations (i.e. @p input of S32 type) the output data type for auto-initialization must be passed as part
+ * of the @ref DirectConvolutionLayerOutputStageKernelInfo.
+ */
+class NEDirectConvolutionLayerOutputStageKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEDirectConvolutionLayerOutputStageKernel";
+ }
+ /** Default constructor */
+ NEDirectConvolutionLayerOutputStageKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDirectConvolutionLayerOutputStageKernel(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDirectConvolutionLayerOutputStageKernel &operator=(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEDirectConvolutionLayerOutputStageKernel(NEDirectConvolutionLayerOutputStageKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEDirectConvolutionLayerOutputStageKernel &operator=(NEDirectConvolutionLayerOutputStageKernel &&) = default;
+ /** Default destructor */
+ ~NEDirectConvolutionLayerOutputStageKernel() = default;
+ /** Set the accumulate buffer and the biases of the kernel.
+ *
+ * @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
+ * Data type supported: F16/F32/S32
+ * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
+ * @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
+ * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr.
+ * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32
+ * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata
+ */
+ void configure(ITensor *input, const ITensor *bias = nullptr, ITensor *output = nullptr,
+ const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel
+ *
+ * @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place.
+ * Data type supported: F16/F32/S32
+ * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
+ * @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
+ * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr.
+ * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32
+ * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr,
+ const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo());
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
+ int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias);
+
+private:
+ OutputStageKernel *_func;
+ ITensor *_input;
+ const ITensor *_bias;
+ ITensor *_output;
+ int _result_fixedpoint_multiplier;
+ int _result_shift;
+ int _result_offset_after_shift;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h"
+#include "src/core/NEON/kernels/NEElementwiseOperationKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H
+#define ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for an element-wise operation kernel
+ *
+ * Element-wise operation is computed by:
+ * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f]
+ *
+ */
+class NEElementwiseOperationKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEElementwiseOperationKernel";
+ }
+ /** Default constructor */
+ NEElementwiseOperationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseOperationKernel(const NEElementwiseOperationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseOperationKernel &operator=(const NEElementwiseOperationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEElementwiseOperationKernel(NEElementwiseOperationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEElementwiseOperationKernel &operator=(NEElementwiseOperationKernel &&) = default;
+ /** Default destructor */
+ ~NEElementwiseOperationKernel() = default;
+
+ /** Common signature for all the specialised arithmetic functions
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor info. Data types supported: Dependent on subclass.
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+
+protected:
+ /** Validate the argument passed to the kernel
+ *
+ * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor. Data types supported: Dependent on subclass.
+ */
+ static Status validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
+
+ /** Commmon configure function for element-wise operators with no additional options (e.g. Min, Max, SquaredDiff)
+ *
+ */
+ void configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+
+ /** Function to use for the particular tensor types passed to configure() */
+ std::function<void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window)> _function;
+
+ const ITensor *_input1;
+ const ITensor *_input2;
+ ITensor *_output;
+};
+
+class NEArithmeticOperationKernel : public NEElementwiseOperationKernel
+{
+public:
+ /** Default constructor */
+ NEArithmeticOperationKernel() = default;
+
+ /** Configure kernel
+ *
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ */
+ void configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
+ *
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a Status
+ */
+ static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+protected:
+ // Inherited methods overridden:
+ static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
+};
+
+class NEDivisionOperationKernel : public NEArithmeticOperationKernel
+{
+public:
+ /** Default constructor */
+ NEDivisionOperationKernel() = default;
+
+ /** Configure kernel
+ *
+ * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ */
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDivisionOperationKernel
+ *
+ * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a Status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+protected:
+ // Inherited methods overridden:
+ static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
+};
+
+class NEPowerOperationKernel : public NEArithmeticOperationKernel
+{
+public:
+ /** Default constructor */
+ NEPowerOperationKernel() = default;
+
+ /** Configure kernel
+ *
+ * @param[in] input1 First tensor input info. Data types supported: F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ */
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEPowerOperationKernel
+ *
+ * @param[in] input1 First tensor input info. Data types supported: F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a Status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+protected:
+ // Inherited methods overridden:
+ static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
+};
+
+class NEComparisonOperationKernel : public NEElementwiseOperationKernel
+{
+public:
+ /** Default constructor */
+ NEComparisonOperationKernel() = default;
+
+ /** Configure kernel
+ *
+ * @param[in] op Comparison operation to be executed.
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor info. Data types supported: U8.
+ */
+ void configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
+ *
+ * @param[in] op Comparison operation to be executed.
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: U8.
+ *
+ * @return a Status
+ */
+ static Status validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+protected:
+ // Inherited methods overridden:
+ static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h"
+#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H
+#define ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for an element-wise unary operation kernel
+ *
+ * Element-wise operation is computed by:
+ * @f[ output(x) = OP(input(x))@f]
+ *
+ */
+class NEElementwiseUnaryKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEElementwiseUnaryKernel";
+ }
+ /** Default constructor */
+ NEElementwiseUnaryKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseUnaryKernel(const NEElementwiseUnaryKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseUnaryKernel &operator=(const NEElementwiseUnaryKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEElementwiseUnaryKernel(NEElementwiseUnaryKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEElementwiseUnaryKernel &operator=(NEElementwiseUnaryKernel &&) = default;
+ /** Default destructor */
+ ~NEElementwiseUnaryKernel() = default;
+
+ /** Function to configure the @ref NEElementwiseUnaryKernel
+ *
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input First tensor input. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ */
+ void configure(ElementWiseUnary op, const ITensor *input, ITensor *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEElementwiseUnaryKernel
+ *
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input First tensor input info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @return a Status
+ */
+ static Status validate(ElementWiseUnary op, const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Common signature for all the specialised arithmetic functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using ElementwiseUnaryPtr = void (NEElementwiseUnaryKernel::*)(const Window &window);
+
+ /** Template function to run elementwise unary operation
+ *
+ * @tparam ScalarType Scalar datatype
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <typename ScalarType>
+ void elementwise_op(const Window &window);
+
+ ElementwiseUnaryPtr _func;
+ const ITensor *_input;
+ ITensor *_output;
+ ElementWiseUnary _op;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEErodeKernel.h"
+#include "src/core/NEON/kernels/NEErodeKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEERODEKERNEL_H
+#define ARM_COMPUTE_NEERODEKERNEL_H
+
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to perform boolean image erosion */
+class NEErodeKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEErodeKernel";
+ }
+ /** Default constructor */
+ NEErodeKernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEErodeKernel(const NEErodeKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEErodeKernel &operator=(const NEErodeKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEErodeKernel(NEErodeKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEErodeKernel &operator=(NEErodeKernel &&) = default;
+ /** Default destructor */
+ ~NEErodeKernel() = default;
+ /** Set the source, destination and border mode of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: U8
+ * @param[out] output Destination tensor. Data type supported: U8
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output, bool border_undefined);
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEERODEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h"
+#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H
+#define ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the digit reverse operation kernel. */
+class NEFFTDigitReverseKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEFFTDigitReverseKernel";
+ }
+ /** Constructor */
+ NEFFTDigitReverseKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFFTDigitReverseKernel(const NEFFTDigitReverseKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFFTDigitReverseKernel &operator=(const NEFFTDigitReverseKernel &) = delete;
+ /** Default Move Constructor. */
+ NEFFTDigitReverseKernel(NEFFTDigitReverseKernel &&) = default;
+ /** Default move assignment operator */
+ NEFFTDigitReverseKernel &operator=(NEFFTDigitReverseKernel &&) = default;
+ /** Default destructor */
+ ~NEFFTDigitReverseKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
+ * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor).
+ * @param[in] idx Digit reverse index tensor. Data type supported: U32
+ * @param[in] config Kernel configuration.
+ */
+ void configure(const ITensor *input, ITensor *output, const ITensor *idx, const FFTDigitReverseKernelInfo &config);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEFFTDigitReverseKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
+ * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor).
+ * @param[in] idx Digit reverse index tensor info. Data type supported: U32
+ * @param[in] config Kernel configuration
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ using NEFFTDigitReverseKernelFunctionPtr = void (NEFFTDigitReverseKernel::*)(const Window &window);
+
+ template <bool is_input_complex, bool is_conj>
+ void digit_reverse_kernel_axis_0(const Window &window);
+
+ template <bool is_input_complex, bool is_conj>
+ void digit_reverse_kernel_axis_1(const Window &window);
+
+ NEFFTDigitReverseKernelFunctionPtr _func;
+ const ITensor *_input;
+ ITensor *_output;
+ const ITensor *_idx;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h"
+#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H
+#define ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/NEON/INEKernel.h"
+
+#include <arm_neon.h>
+#include <set>
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the FFT kernel. */
+class NEFFTRadixStageKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEFFTRadixStageKernel";
+ }
+ /** Constructor */
+ NEFFTRadixStageKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFFTRadixStageKernel(const NEFFTRadixStageKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFFTRadixStageKernel &operator=(const NEFFTRadixStageKernel &) = delete;
+ /** Default Move Constructor. */
+ NEFFTRadixStageKernel(NEFFTRadixStageKernel &&) = default;
+ /** Default move assignment operator */
+ NEFFTRadixStageKernel &operator=(NEFFTRadixStageKernel &&) = default;
+ /** Default destructor */
+ ~NEFFTRadixStageKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @note If the output tensor is nullptr, the FFT will be performed in-place
+ *
+ * @param[in,out] input Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
+ * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: same as @p input.
+ * @param[in] config FFT descriptor metadata.
+ */
+ void configure(ITensor *input, ITensor *output, const FFTRadixStageKernelInfo &config);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEFFTRadixStageKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
+ * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: same as @p input.
+ * @param[in] config FFT descriptor metadata.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config);
+ /** Returns the radix that are support by the FFT kernel
+ *
+ * @return A set of supported radix
+ */
+ static std::set<unsigned int> supported_radix();
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ ITensor *_input;
+ ITensor *_output;
+ bool _run_in_place;
+ unsigned int _Nx;
+ unsigned int _axis;
+ unsigned int _radix;
+
+ void set_radix_stage_axis0(const FFTRadixStageKernelInfo &config);
+ void set_radix_stage_axis1(const FFTRadixStageKernelInfo &config);
+
+ using FFTFunctionPointerAxis0 = std::function<void(float *, float *, unsigned int, unsigned int, const float32x2_t &, unsigned int)>;
+ using FFTFunctionPointerAxis1 = std::function<void(float *, float *, unsigned int, unsigned int, const float32x2_t &, unsigned int, unsigned int)>;
+
+ FFTFunctionPointerAxis0 _func_0;
+ FFTFunctionPointerAxis1 _func_1;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEFFTScaleKernel.h"
+#include "src/core/NEON/kernels/NEFFTScaleKernel.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEFFTSCALEKERNEL_H
+#define ARM_COMPUTE_NEFFTSCALEKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the inverse fft scale kernel. */
+class NEFFTScaleKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEFFTScaleKernel";
+ }
+ /** Constructor */
+ NEFFTScaleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFFTScaleKernel(const NEFFTScaleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFFTScaleKernel &operator=(const NEFFTScaleKernel &) = delete;
+ /** Default Move Constructor. */
+ NEFFTScaleKernel(NEFFTScaleKernel &&) = default;
+ /** Default move assignment operator */
+ NEFFTScaleKernel &operator=(NEFFTScaleKernel &&) = default;
+ /** Default destructor */
+ ~NEFFTScaleKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in,out] input Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
+ * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
+ * @param[in] config Kernel configuration
+ */
+ void configure(ITensor *input, ITensor *output, const FFTScaleKernelInfo &config);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEFFTScaleKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
+ * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
+ * @param[in] config Kernel configuration
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ ITensor *_input;
+ ITensor *_output;
+ float _scale;
+ bool _run_in_place;
+ bool _is_conj;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEFFTSCALEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h"
+#include "src/core/NEON/kernels/NEFastCornersKernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEFASTCORNERSKERNEL_H
+#define ARM_COMPUTE_NEFASTCORNERSKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensor;
+using IImage = ITensor;
+
+/** NEON kernel to perform fast corners */
+class NEFastCornersKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEFastCornersKernel";
+ }
+ /** Constructor */
+ NEFastCornersKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFastCornersKernel(const NEFastCornersKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFastCornersKernel &operator=(const NEFastCornersKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEFastCornersKernel(NEFastCornersKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEFastCornersKernel &operator=(NEFastCornersKernel &&) = default;
+ /** Default destructor */
+ ~NEFastCornersKernel() = default;
+ /** Initialise the kernel.
+ *
+ * @param[in] input Source image. Data type supported: U8.
+ * @param[out] output Output image. Data type supported: U8.
+ * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
+ * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ const IImage *_input; /**< source image */
+ IImage *_output; /**< inermediate results */
+ uint8_t _threshold; /**< threshold on difference between intensity */
+ bool _non_max_suppression; /** true if non-maxima suppression is applied in the next stage */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEFASTCORNERSKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h"
+#include "src/core/NEON/kernels/NEFillArrayKernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEFILLARRAYKERNEL_H
+#define ARM_COMPUTE_NEFILLARRAYKERNEL_H
+
+#include "arm_compute/core/IArray.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensor;
+using IImage = ITensor;
+
+/** This kernel adds all texels greater than or equal to the threshold value to the keypoint array. */
+class NEFillArrayKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEFillArrayKernel";
+ }
+ /** Default contructor */
+ NEFillArrayKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFillArrayKernel(const NEFillArrayKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFillArrayKernel &operator=(const NEFillArrayKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEFillArrayKernel(NEFillArrayKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEFillArrayKernel &operator=(NEFillArrayKernel &&) = default;
+ /** Default detructor */
+ ~NEFillArrayKernel() = default;
+
+ /** Initialise the kernel.
+ *
+ * @param[in] input Source image. Data type supported: U8.
+ * @param[in] threshold Texels greater than the threshold will be added to the array.
+ * @param[out] output Arrays of keypoints to store the results.
+ */
+ void configure(const IImage *input, uint8_t threshold, IKeyPointArray *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ bool is_parallelisable() const override;
+
+private:
+ const IImage *_input;
+ IKeyPointArray *_output;
+ uint8_t _threshold;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEFILLARRAYKERNEL_H*/
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEFILLBORDERKERNEL_H
+#define ARM_COMPUTE_NEFILLBORDERKERNEL_H
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the kernel to fill borders */
+class NEFillBorderKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEFillBorderKernel";
+ }
+ /** Default Constructor */
+ NEFillBorderKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFillBorderKernel(const NEFillBorderKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFillBorderKernel &operator=(const NEFillBorderKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEFillBorderKernel(NEFillBorderKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEFillBorderKernel &operator=(NEFillBorderKernel &&) = default;
+ /** Default destructor */
+ ~NEFillBorderKernel() = default;
+
+ /** Initialise the function.
+ *
+ * @note This kernel fills the borders within the XY-planes.
+ *
+ * @param[in,out] tensor Tensor to process. Data types supported: All.
+ * @param[in] border_size Size of the border to fill in elements.
+ * @param[in] border_mode Border mode to use for the convolution.
+ * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+ *
+ */
+ void configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ void fill_replicate_single_channel(const Window &window);
+ void fill_constant_value_single_channel(const Window &window);
+
+ ITensor *_tensor;
+ BorderSize _border_size;
+ BorderMode _mode;
+ PixelValue _constant_border_value;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEFILLBORDERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
+#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEFLATTENLAYERKERNEL_H
+#define ARM_COMPUTE_NEFLATTENLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the flatten layer kernel. */
+class NEFlattenLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEFlattenLayerKernel";
+ }
+ /** Default constructor */
+ NEFlattenLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFlattenLayerKernel(const NEFlattenLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFlattenLayerKernel &operator=(const NEFlattenLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEFlattenLayerKernel(NEFlattenLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEFlattenLayerKernel &operator=(NEFlattenLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEFlattenLayerKernel() = default;
+
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] input First input tensor to flatten with at least 3 dimensions.
+ * The dimensions above the third will be interpreted as batches. Data types supported: All
+ * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
+ * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayerKernel
+ *
+ * @param[in] input First input tensor to flatten with at least 3 dimensions.
+ * The dimensions above the third will be interpreted as batches. Data types supported: All
+ * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
+ * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEFLATTENLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEFloorKernel.h"
+#include "src/core/NEON/kernels/NEFloorKernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Validate.h"
#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEFLOORKERNEL_H
+#define ARM_COMPUTE_NEFLOORKERNEL_H
+
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform a floor operation */
+class NEFloorKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEFloorKernel";
+ }
+ /** Constructor */
+ NEFloorKernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFloorKernel(const NEFloorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFloorKernel &operator=(const NEFloorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEFloorKernel(NEFloorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEFloorKernel &operator=(NEFloorKernel &&) = default;
+ /** Default destructor */
+ ~NEFloorKernel() = default;
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: F16/F32.
+ * @param[out] output Destination tensor. Same as @p input
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEFloorKernel
+ *
+ * @param[in] input Source tensor info. Data type supported: F16/F32.
+ * @param[in] output Destination tensor info. Same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEFLOORKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h"
+#include "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H
+#define ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** OpenNE kernel to fuse the batch normalization node to a preceding convolution node */
+class NEFuseBatchNormalizationKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEFuseBatchNormalizationKernel";
+ }
+ /** Default constructor */
+ NEFuseBatchNormalizationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFuseBatchNormalizationKernel(const NEFuseBatchNormalizationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFuseBatchNormalizationKernel &operator=(const NEFuseBatchNormalizationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEFuseBatchNormalizationKernel(NEFuseBatchNormalizationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEFuseBatchNormalizationKernel &operator=(NEFuseBatchNormalizationKernel &&) = default;
+ /** Default destructor */
+ ~NEFuseBatchNormalizationKernel() = default;
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
+ * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights
+ * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights
+ * @param[out] fused_weights (Optional) Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
+ * @param[out] fused_bias (Optional) Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
+ * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
+ * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
+ * @note if nullptr, bn_beta is set to 0.0
+ * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
+ * @note if nullptr, bn_gamma is set to 1.0
+ * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
+ * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
+ */
+ void configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias,
+ const ITensor *input_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr,
+ float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEFuseBatchNormalizationKernel
+ *
+ * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
+ * @param[in] bn_mean Batch normalization layer mean tensor info. Same as @p input_weights
+ * @param[in] bn_var Batch normalization layer variance tensor info. Same as @p input_weights
+ * @param[in] fused_weights (Optional) Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights
+ * @param[in] fused_bias (Optional) Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
+ * @param[in] input_bias (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
+ * @param[in] bn_beta (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
+ * @note if nullptr, bn_beta is set to 0.0
+ * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
+ * @note if nullptr, bn_gamma is set to 1.0
+ * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
+ * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
+ const ITensorInfo *fused_weights, const ITensorInfo *fused_bias,
+ const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr,
+ float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input_weights;
+ const ITensor *_input_bias;
+ const ITensor *_bn_mean;
+ const ITensor *_bn_var;
+ const ITensor *_bn_gamma;
+ const ITensor *_bn_beta;
+ ITensor *_fused_weights;
+ ITensor *_fused_bias;
+ float _epsilon;
+ bool _run_in_place_weights;
+ bool _run_in_place_bias;
+
+ using FuseBatchNormFunction = void(const ITensor *input_weights, const ITensor *input_bias, ITensor *fused_weights, ITensor *fused_bias,
+ const ITensor *bn_mean, const ITensor *bn_var, const ITensor *bn_beta, const ITensor *bn_gamma, float epsilon, const Window &window);
+
+ FuseBatchNormFunction *_func;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H */
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMASSEMBLYBASE_H
+#define ARM_COMPUTE_NEGEMMASSEMBLYBASE_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Base class for GEMM NEON kernels implemented in Assembly. */
+class NEGEMMAssemblyBaseKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGEMMAssemblyBaseKernel";
+ }
+ /** Constructor */
+ NEGEMMAssemblyBaseKernel()
+ : _input0(nullptr), _input1(nullptr), _output(nullptr), _workspace(nullptr), _alpha(1.f), _beta(0.f), _is_transposed_0(false), _is_transposed_1(false)
+ {
+ }
+
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMAssemblyBaseKernel(const NEGEMMAssemblyBaseKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMAssemblyBaseKernel &operator=(const NEGEMMAssemblyBaseKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMAssemblyBaseKernel(NEGEMMAssemblyBaseKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMAssemblyBaseKernel &operator=(NEGEMMAssemblyBaseKernel &&) = default;
+
+ virtual ~NEGEMMAssemblyBaseKernel() = default;
+
+ /** Initialise the kernel's input and output.
+ *
+ * The computed function is C = a * AxB + b * C.
+ *
+ * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F32
+ * @param[in] input1 Input tensor containing the Matrix B. Data types supported: same as @p input0
+ * @param[in,out] output Output tensor to store the result of matrix multiplication. If @p beta is not zero the values are multiplied by @p beta before the result is accumulated. Otherwise the values are overwritten by the result. Data types supported: same as @p input0.
+ * @param[out] workspace Space for intermediate results.
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of the accumulation.
+ * @param[in] is_transposed_0 (Optional)True if @p input0 is transposed else false. (Defaults to false)
+ * @param[in] is_transposed_1 (Optional)True if @p input1 is transposed else false. (Defaults to false)
+ */
+ void configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha = 1.f, float beta = 0.f, bool is_transposed_0 = false, bool is_transposed_1 = false)
+ {
+ internal_configure(input0, input1, output, workspace, alpha, beta, is_transposed_0, is_transposed_1);
+ }
+
+protected:
+ virtual void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool _is_transposed_0, bool _is_transposed_1) = 0;
+
+ const ITensor *_input0;
+ const ITensor *_input1;
+ ITensor *_output;
+ ITensor *_workspace;
+ float _alpha;
+ float _beta;
+ bool _is_transposed_0;
+ bool _is_transposed_1;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEGEMMASSEMBLYBASE_H*/
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H
+#define ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H
+
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to interleave the elements of a matrix
+ *
+ * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values)
+ *
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccccccccccc}
+ * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\
+ * \end{array} \right)
+ * @f]
+ *
+ * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ]
+ */
+class NEGEMMInterleave4x4Kernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGEMMInterleave4x4Kernel";
+ }
+ /** Constructor */
+ NEGEMMInterleave4x4Kernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMInterleave4x4Kernel(const NEGEMMInterleave4x4Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMInterleave4x4Kernel &operator=(const NEGEMMInterleave4x4Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMInterleave4x4Kernel(NEGEMMInterleave4x4Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMInterleave4x4Kernel &operator=(NEGEMMInterleave4x4Kernel &&) = default;
+ /** Default destructor */
+ ~NEGEMMInterleave4x4Kernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input.
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMInterleave4x4Kernel
+ *
+ * @param[in] input Input tensor info. Data types supported: All
+ * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Template function to run gemm interleave 4x4
+ *
+ * @tparam ScalarType Scalar datatype
+ *
+ * @param[in] input Input tensor. Data types supported: uint32_t, uint16_t and uint8_t
+ * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <typename ScalarType>
+ void gemm_interleave4x4(const ITensor *input, ITensor *output, const Window &window);
+
+ /** Common signature for all the specialised gemm interleave 4x4 functions
+ *
+ * @param[in] input Input tensor. Data types supported: uint32_t, uint16_t and uint8_t
+ * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ using GEMMInterleaveFunctionFuncPtr = void (NEGEMMInterleave4x4Kernel::*)(const ITensor *input, ITensor *output, const Window &window);
+
+ GEMMInterleaveFunctionFuncPtr _func;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H*/
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H
+#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to multiply matrices
+ *
+ * @note @ref NEGEMMLowpMatrixMultiplyKernel low precision matrix product kernel
+ * This kernel performs the following computation:
+ *
+ * -# Convert a values from int8 to int32
+ * -# Convert b values from int8 to int32
+ * -# Compute the int32 matrix product of the resulting a * b and store the result as int32
+ *
+ */
+class NEGEMMLowpMatrixMultiplyKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGEMMLowpMatrixMultiplyKernel";
+ }
+ /** Constructor */
+ NEGEMMLowpMatrixMultiplyKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ NEGEMMLowpMatrixMultiplyKernel(const NEGEMMLowpMatrixMultiplyKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ NEGEMMLowpMatrixMultiplyKernel &operator=(const NEGEMMLowpMatrixMultiplyKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpMatrixMultiplyKernel(NEGEMMLowpMatrixMultiplyKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpMatrixMultiplyKernel &operator=(NEGEMMLowpMatrixMultiplyKernel &&) = default;
+ /** Default destructor */
+ ~NEGEMMLowpMatrixMultiplyKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel. These two
+ * kernels change the layout of the original matrices to be more cache-friendly.
+ *
+ * @param[in] input0 Input tensor containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED
+ * @param[in] input1 Input tensor containing the transposed1xW Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
+ */
+ void configure(const ITensor *input0, const ITensor *input1, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyKernel
+ *
+ * @param[in] input0 Input tensor info containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED
+ * @param[in] input1 Input tensor info containing the transposed Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
+ * @param[in] output Output tensor info to store the result of matrix multiplication. Data type supported: S32
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input0;
+ const ITensor *_input1;
+ ITensor *_output;
+ bool _slide_matrix_b;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H*/
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
+#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel used to add the offset contribution after @ref NEGEMMLowpMatrixMultiplyKernel. The computation is performed in-place
+ *
+ * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel),
+ * and adds to it the offset contribution of matrix A and matrix B in-place.
+ *
+ * The final result is:
+ *
+ * mm_result[i][k] = mm_result[i][k] +
+ * (vector_sum_col[k] * a_offset) +
+ * (vector_sum_row[i] * b_offset) +
+ * (a_offset * b_offset * k)
+ *
+ */
+class NEGEMMLowpOffsetContributionKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGEMMLowpOffsetContributionKernel";
+ }
+ /** Constructor */
+ NEGEMMLowpOffsetContributionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ NEGEMMLowpOffsetContributionKernel(const NEGEMMLowpOffsetContributionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ NEGEMMLowpOffsetContributionKernel &operator=(const NEGEMMLowpOffsetContributionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpOffsetContributionKernel(NEGEMMLowpOffsetContributionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpOffsetContributionKernel &operator=(NEGEMMLowpOffsetContributionKernel &&) = default;
+ /** Default destructor */
+ ~NEGEMMLowpOffsetContributionKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in, out] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
+ * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] k Number of matrix A columns or Matrix B rows
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ */
+ void configure(ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, int32_t k, int32_t a_offset, int32_t b_offset);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionKernel
+ *
+ * @param[in] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
+ * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, int32_t a_offset, int32_t b_offset);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_vector_sum_col;
+ const ITensor *_vector_sum_row;
+ ITensor *_mm_result;
+ int32_t _a_offset;
+ int32_t _b_offset;
+ int32_t _k_offset;
+ bool _slide_vector_sum_col;
+};
+} // namespace arm_compute
+
+#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
+#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel used to add the offset contribution and perform the output stage after @ref NEGEMMLowpMatrixMultiplyKernel.
+ *
+ * The computation is performed in-place
+ *
+ * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel),
+ * and adds to it the offset contribution of matrix A and matrix B in-place.
+ *
+ * The output stage can perform either QuantizeDownInt32ToUint8Scale or QuantizeDownInt32ToUint8ScaleByFixedPoint for Uint8.
+ * The output stage can perform either QuantizeDownInt32ToInt8Scale or QuantizeDownInt32ToInt8ScaleByFixedPoint for Int8.
+ *
+ * For QuantizeDownInt32ToUint8Scale/QuantizeDownInt32ToInt8Scale the final result is:
+ *
+ * ((mm_result'[i][k] + result_offset) * result_mult_int) >> result_shift
+ *
+ * For QuantizeDownInt32ToUint8ScaleByFixedPoint/QuantizeDownInt32ToInt8ScaleByFixedPoint the final result is:
+ *
+ * (FixedPointMul(mm_result'[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
+ *
+ * where FixedPointMul(x, y) is the nearest integer to the following
+ * mathematical expression, evaluated without overflow or intermediate rounding:
+ *
+ * (x * y) / 2^31
+ *
+ * and mm_result'[i][k] = mm_result[i][k] +
+ * (vector_sum_col[k] * a_offset) +
+ * (vector_sum_row[i] * b_offset) +
+ * (a_offset * b_offset * k)
+ */
+
+class NEGEMMLowpOffsetContributionOutputStageKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGEMMLowpOffsetContributionOutputStageKernel";
+ }
+ /** Constructor */
+ NEGEMMLowpOffsetContributionOutputStageKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ NEGEMMLowpOffsetContributionOutputStageKernel(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ NEGEMMLowpOffsetContributionOutputStageKernel &operator=(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpOffsetContributionOutputStageKernel(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpOffsetContributionOutputStageKernel &operator=(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default;
+ /** Default destructor */
+ ~NEGEMMLowpOffsetContributionOutputStageKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
+ * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result.
+ * @param[out] output Output tensor containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] k Number of matrix A columns or Matrix B rows
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ * @param[in] output_stage GEMMLowp output stage info, providing the type of quantization and the necessary parameters.
+ */
+ void configure(const ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, const ITensor *bias, ITensor *output, int32_t k, int32_t a_offset, int32_t b_offset,
+ GEMMLowpOutputStageInfo output_stage);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionOutputStageKernel
+ *
+ * @param[in] mm_result Input tensor info containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
+ * @param[in] vector_sum_col Tensor info for the input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] vector_sum_row Tensor info for the input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result.
+ * @param[in] output Output tensor info containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ * @param[in] output_stage GEMMLowp output stage info, providing the type of quantization and the necessary parameters.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset,
+ int32_t b_offset,
+ GEMMLowpOutputStageInfo output_stage);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Function to use for the particular tensors passed to configure() */
+ const ITensor *_vector_sum_col;
+ const ITensor *_vector_sum_row;
+ const ITensor *_bias;
+ const ITensor *_mm_result;
+ ITensor *_output;
+ int32_t _a_offset;
+ int32_t _b_offset;
+ int32_t _k_offset;
+ bool _slide_vector_sum_col;
+ GEMMLowpOutputStageInfo _output_stage;
+};
+} // namespace arm_compute
+
+#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
+#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
+ *
+ * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
+ * The following computations will be performed by the kernel:
+ *
+ * -# Add offset terms to final result
+ * -# Multiply each entry of result by result_mult_int
+ * -# Add bias to final result if bias tensor is not a nullptr
+ * -# Shift the int32 accumulator by result_shift
+ * -# Clamp the value between the specified min and max bounds
+ * -# Clamp the resulting int32 values:
+ * -# -to the [0..255] range and cast to QASYMM8.
+ * -# -to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ */
+class NEGEMMLowpQuantizeDownInt32ScaleKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGEMMLowpQuantizeDownInt32ScaleKernel";
+ }
+ /** Constructor */
+ NEGEMMLowpQuantizeDownInt32ScaleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ NEGEMMLowpQuantizeDownInt32ScaleKernel(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpQuantizeDownInt32ScaleKernel(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default;
+ /** Default destructor */
+ ~NEGEMMLowpQuantizeDownInt32ScaleKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[out] output_stage GEMMLowp output stage metadata.
+ */
+ void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo *output_stage);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ScaleKernel
+ *
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[out] output_stage GEMMLowp output stage metadata.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Template function to run the NEGEMMLowpQuantizeDownInt32ScaleKernel
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <typename T>
+ void run(const Window &window);
+
+ /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ScaleKernel functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ScaleKernel::*)(const Window &window);
+
+ QuantizeDownFunctionPtr _func;
+ const ITensor *_input;
+ const ITensor *_bias;
+ ITensor *_output;
+ const GEMMLowpOutputStageInfo *_output_stage;
+ bool _is_bounded_relu;
+};
+} // namespace arm_compute
+
+#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H
+#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16
+ *
+ * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QSYMM16 value.
+ * The following computations will be performed by the kernel:
+ *
+ * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
+ * -# Add bias to final result if bias tensor is not a nullptr
+ * -# Round to nearest division by a power-of-two using result_shift
+ * -# Clamp the value between the specified min and max bounds
+ * -# Clamp the resulting int32 values to the [-32768, 32767] range and cast to QSYMM16.
+ *
+ */
+class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel";
+ }
+ /** Constructor */
+ NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default;
+ /** Default destructor */
+ ~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16
+ * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+ * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+ * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+ */
+ void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
+ *
+ * @param[in] input Input tensor info. Data type supported: S32
+ * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor info with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+ * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <bool is_bounded_relu>
+ void run(const Window &window);
+
+ /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::*)(const Window &window);
+
+ QuantizeDownFunctionPtr _func;
+ const ITensor *_input;
+ const ITensor *_bias;
+ ITensor *_output;
+ int _result_fixedpoint_multiplier;
+ int _result_shift;
+ int _min;
+ int _max;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H
+#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED
+ *
+ * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8_SIGNED value.
+ * The following computations will be performed by the kernel:
+ *
+ * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
+ * -# Add bias to final result if bias tensor is not a nullptr
+ * -# Round to nearest division by a power-of-two using result_shift
+ * -# Add offset to each result
+ * -# Clamp the value between the specified min and max bounds
+ * -# Clamp the resulting int32 values to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ */
+class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel";
+ }
+ /** Constructor */
+ NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default;
+ /** Default destructor */
+ ~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
+ * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+ * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
+ * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
+ * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ */
+ void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
+ *
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
+ * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <bool is_bounded_relu>
+ void run(const Window &window);
+
+ /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::*)(const Window &window);
+
+ QuantizeDownFunctionPtr _func;
+ const ITensor *_input;
+ const ITensor *_bias;
+ ITensor *_output;
+ int _result_fixedpoint_multiplier;
+ int _result_shift;
+ int _result_offset_after_shift;
+ int _min;
+ int _max;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H
+#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8
+ *
+ * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8 value.
+ * The following computations will be performed by the kernel:
+ *
+ * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
+ * -# Add bias to final result if bias tensor is not a nullptr
+ * -# Round to nearest division by a power-of-two using result_shift
+ * -# Add offset to each result
+ * -# Clamp the value between the specified min and max bounds
+ * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8.
+ *
+ */
+class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel";
+ }
+ /** Constructor */
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default;
+ /** Default destructor */
+ ~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8
+ * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+ * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
+ * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ */
+ void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
+ *
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Template function to run the NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <bool is_bounded_relu>
+ void run(const Window &window);
+
+ /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::*)(const Window &window);
+
+ QuantizeDownFunctionPtr _func;
+ const ITensor *_input;
+ const ITensor *_bias;
+ ITensor *_output;
+ int _result_fixedpoint_multiplier;
+ int _result_shift;
+ int _result_offset_after_shift;
+ int _min;
+ int _max;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/KernelDescriptors.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H
+#define ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+struct GEMMLowpReductionKernelInfo;
+
+/** Common interface for all NEON reduction kernels */
+class INEGEMMLowpReductionKernel : public INEKernel
+{
+public:
+ /** Constructor */
+ INEGEMMLowpReductionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ INEGEMMLowpReductionKernel(const INEGEMMLowpReductionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ INEGEMMLowpReductionKernel &operator=(const INEGEMMLowpReductionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ INEGEMMLowpReductionKernel(INEGEMMLowpReductionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ INEGEMMLowpReductionKernel &operator=(INEGEMMLowpReductionKernel &&) = default;
+ /** Default destructor */
+ virtual ~INEGEMMLowpReductionKernel() = default;
+
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
+ * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k Number of matrix columns/rows depending on the type of reduction.
+ * - is_reshaped True if the matrix has been reshaped.
+ * - scalar Scalar value to multiply each reduced column/row by.
+ * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+ */
+ virtual void configure(const ITensor *input, ITensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
+
+protected:
+ const ITensor *_input;
+ ITensor *_output;
+ int32_t _k;
+ int32_t _scalar;
+ bool _mul_by_scalar;
+};
+
+/** NEON kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
+ *
+ * @note This stage is needed to handle the offset of matrix product
+ * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
+ */
+class NEGEMMLowpMatrixAReductionKernel : public INEGEMMLowpReductionKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGEMMLowpMatrixAReductionKernel";
+ }
+ /** Default constructor */
+ NEGEMMLowpMatrixAReductionKernel() = default;
+ /** Prevent instances of this class from being copied */
+ NEGEMMLowpMatrixAReductionKernel(const NEGEMMLowpMatrixAReductionKernel &) = delete;
+ /** Prevent instances of this class from being copied */
+ NEGEMMLowpMatrixAReductionKernel &operator=(const NEGEMMLowpMatrixAReductionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpMatrixAReductionKernel(NEGEMMLowpMatrixAReductionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpMatrixAReductionKernel &operator=(NEGEMMLowpMatrixAReductionKernel &&) = default;
+ /** Default destructor */
+ ~NEGEMMLowpMatrixAReductionKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
+ * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k (num_mtx_a_cols) Number of matrix A columns
+ * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4
+ * - scalar Scalar value to multiply each reduced row by.
+ * - mul_byscalar True if each reduced column must be multiplied by a scalar value.
+ */
+ void configure(const ITensor *mtx_a, ITensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixAReductionKernel
+ *
+ * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
+ * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k (num_mtx_a_cols) Number of matrix A columns
+ * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4
+ * - scalar Scalar value to multiply each reduced row by.
+ * - mul_byscalar True if each reduced column must be multiplied by a scalar value.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Execution of the reduction kernel specialized on the input type
+ *
+ * @param[in] window Execution window
+ */
+ template <typename T>
+ void run_internal(const Window &window);
+};
+
+/** NEON kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
+ *
+ * @note This stage is needed to handle the offset of matrix product
+ * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
+ */
+class NEGEMMLowpMatrixBReductionKernel : public INEGEMMLowpReductionKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGEMMLowpMatrixBReductionKernel";
+ }
+ /** Default constructor */
+ NEGEMMLowpMatrixBReductionKernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMLowpMatrixBReductionKernel(const NEGEMMLowpMatrixBReductionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMLowpMatrixBReductionKernel &operator=(const NEGEMMLowpMatrixBReductionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpMatrixBReductionKernel(NEGEMMLowpMatrixBReductionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMLowpMatrixBReductionKernel &operator=(NEGEMMLowpMatrixBReductionKernel &&) = default;
+ /** Default destructor */
+ ~NEGEMMLowpMatrixBReductionKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
+ * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k (num_mtx_b_rows) Number of matrix B rows.
+ * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW.
+ * - scalar Scalar value to multiply each reduced row by.
+ * - mul_byscalar True if each reduced row must be multiplied by a scalar value.
+ */
+ void configure(const ITensor *mtx_b, ITensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixBReductionKernel
+ *
+ * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
+ * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k (num_mtx_b_rows) Number of matrix B rows.
+ * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW.
+ * - scalar Scalar value to multiply each reduced row by.
+ * - mul_byscalar True if each reduced row must be multiplied by a scalar value.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Execution of the reduction kernel specialized on the input type
+ *
+ * @param[in] window Execution window
+ * @param[in] info Thread-related information
+ */
+ template <typename T>
+ void run_internal(const Window &window, const ThreadInfo &info);
+};
+} // namespace arm_compute
+
+#endif /* ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H
+#define ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H
+
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta:
+ *
+ * @note [ MTX_OUT = MTX_0 + beta * MTX_1 ] with MTX_0 and MTX_1 of the same size
+ *
+ * @note This stage is used to finalize the GEMM result and it is computed if and only if beta != 0.0. In case this kernel is used for finalizing GEMM result, we have:
+ * - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref NEGEMMMatrixMultiplyKernel
+ * - MTX_1 = C
+ */
+class NEGEMMMatrixAdditionKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGEMMMatrixAdditionKernel";
+ }
+ /** Constructor */
+ NEGEMMMatrixAdditionKernel();
+ /** Prevent instances of this class from being copied */
+ NEGEMMMatrixAdditionKernel(const NEGEMMMatrixAdditionKernel &) = delete;
+ /** Prevent instances of this class from being copied */
+ NEGEMMMatrixAdditionKernel &operator=(const NEGEMMMatrixAdditionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMMatrixAdditionKernel(NEGEMMMatrixAdditionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMMatrixAdditionKernel &operator=(NEGEMMMatrixAdditionKernel &&) = default;
+ /** Default destructor */
+ ~NEGEMMMatrixAdditionKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @note The input and output tensor must have the same dimensions
+ *
+ * @param[in] input Input tensor (Matrix C). Data types supported: F16/F32
+ * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input.
+ * @param[in] beta Weight of matrix C
+ */
+ void configure(const ITensor *input, ITensor *output, float beta);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAdditionKernel.
+ *
+ * @note The input and output tensor must have the same dimensions
+ *
+ * @param[in] input Input tensor info (Matrix C). Data types supported: F16/F32
+ * @param[in] output Output tensor info. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input.
+ * @param[in] beta Weight of matrix C
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Common signature for all the matrix addition functions
+ *
+ * @param[in] input An input tensor. Data types supported: F16/F32
+ * @param[out] output The output tensor. Data type supported: same as @p input
+ * @param[in] window Region on which to execute the kernel.
+ * @param[in] beta Weight of matrix C
+ */
+ using MatrixAdditionFunction = void(const ITensor *input, ITensor *output, const Window &window, float beta);
+ /** Matrix addition function to use for the particular tensor types passed to configure() */
+ MatrixAdditionFunction *_func;
+ float _beta;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H
+#define ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication
+ *
+ * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref NEGEMMInterleave4x4Kernel" and @ref NEGEMMTranspose1xWKernel
+ * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped
+ *
+ */
+class NEGEMMMatrixMultiplyKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGEMMMatrixMultiplyKernel";
+ }
+ /** Constructor */
+ NEGEMMMatrixMultiplyKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMMatrixMultiplyKernel(const NEGEMMMatrixMultiplyKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMMatrixMultiplyKernel &operator=(const NEGEMMMatrixMultiplyKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMMatrixMultiplyKernel(NEGEMMMatrixMultiplyKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMMatrixMultiplyKernel &operator=(NEGEMMMatrixMultiplyKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
+ * These two kernels change the layout of the original matrices to be more cache-friendly.
+ *
+ * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
+ * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
+ * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
+ * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
+ */
+ void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixMultiplyKernel
+ *
+ * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
+ * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
+ * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
+ * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
+ * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input0;
+ const ITensor *_input1;
+ ITensor *_output;
+ float _alpha;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H*/
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "src/core/AccessWindowStatic.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H
+#define ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H
+
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
+ *
+ * Following an example of how the transposition1xW works when the input data is F32
+ *
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccccccccccc}
+ * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * @f]
+ *
+ * Following an example of how the transposition1xW works when the input data type is F16
+ *
+ * @f[
+ * \left( \begin{array}{cccccccc}
+ * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 \\
+ * a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 \\
+ * a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 \\
+ * a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc}
+ * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\
+ * \end{array} \right)
+ * @f]
+ *
+ * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
+ *
+ */
+class NEGEMMTranspose1xWKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGEMMTranspose1xWKernel";
+ }
+ /** Constructor */
+ NEGEMMTranspose1xWKernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMTranspose1xWKernel(const NEGEMMTranspose1xWKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMTranspose1xWKernel &operator=(const NEGEMMTranspose1xWKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMTranspose1xWKernel(NEGEMMTranspose1xWKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMTranspose1xWKernel &operator=(NEGEMMTranspose1xWKernel &&) = default;
+ /** Default destructor */
+ ~NEGEMMTranspose1xWKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor. Data type supported: same as @p input.
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xWKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: All
+ * @param[in] output Output tensor info. Data type supported: same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGatherKernel.h"
+#include "src/core/NEON/kernels/NEGatherKernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_NEGATHERKERNEL_H
+#define ARM_COMPUTE_NEGATHERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Kernel to perform other operation on NEON */
+class NEGatherKernel : public INEKernel
+{
+public:
+ /** Default constructor. */
+ NEGatherKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NEGatherKernel(const NEGatherKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NEGatherKernel &operator=(const NEGatherKernel &) = delete;
+ /** Allow instances of this class to be moved. */
+ NEGatherKernel(NEGatherKernel &&) = default;
+ /** Allow instances of this class to be moved. */
+ NEGatherKernel &operator=(NEGatherKernel &&) = default;
+ /** Default detructor */
+ ~NEGatherKernel() = default;
+
+ /** Name of the kernel
+ *
+ * @return Kernel name
+ */
+ const char *name() const override
+ {
+ return "NEGatherKernel";
+ }
+ /** Initialise the kernel's inputs and outputs
+ *
+ * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
+ */
+ void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel
+ *
+ * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All
+ * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Implementation of the gather operation for 0 axis.
+ *
+ * For gather on the 0 axis an element by element copy is performed.
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window())
+ * @param[in] info Info about executing thread and CPU.
+ */
+ template <typename U>
+ void gather_0_axis(const Window &window, const ThreadInfo &info);
+
+ /** Implementation of the gather operation.
+ *
+ * For 1<=axis a row-wise copy is taking place.
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window())
+ * @param[in] info Info about executing thread and CPU.
+ */
+ template <typename U>
+ void gather_n_axis(const Window &window, const ThreadInfo &info);
+
+ using kernel_ptr = void (NEGatherKernel::*)(const Window &window, const ThreadInfo &info);
+
+ const ITensor *_input;
+ const ITensor *_indices;
+ int _axis;
+ ITensor *_output;
+ kernel_ptr _func;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEGATHERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h"
+#include "src/core/NEON/kernels/NEGaussian3x3Kernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H
+#define ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H
+
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform a Gaussian 3x3 filter */
+class NEGaussian3x3Kernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGaussian3x3Kernel";
+ }
+ /** Constructor */
+ NEGaussian3x3Kernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussian3x3Kernel(const NEGaussian3x3Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussian3x3Kernel &operator=(const NEGaussian3x3Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGaussian3x3Kernel(NEGaussian3x3Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGaussian3x3Kernel &operator=(NEGaussian3x3Kernel &&) = default;
+ /** Default destructor */
+ ~NEGaussian3x3Kernel() = default;
+ /** Set the source, destination and border mode of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: U8
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h"
+#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
input, output);
}
+NEGaussian5x5VertKernel::NEGaussian5x5VertKernel()
+{
+}
+
BorderSize NEGaussian5x5VertKernel::border_size() const
{
return BorderSize{ 2, 0 };
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H
+#define ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H
+
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform a Gaussian 5x5 filter (horizontal pass) */
+class NEGaussian5x5HorKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGaussian5x5HorKernel";
+ }
+ /** Default constructor */
+ NEGaussian5x5HorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussian5x5HorKernel(NEGaussian5x5HorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussian5x5HorKernel &operator=(NEGaussian5x5HorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGaussian5x5HorKernel(NEGaussian5x5HorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGaussian5x5HorKernel &operator=(NEGaussian5x5HorKernel &&) = default;
+ /** Default destructor */
+ ~NEGaussian5x5HorKernel() = default;
+
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[out] output Destination tensor. Data type supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ BorderSize _border_size;
+};
+
+/** NEON kernel to perform a Gaussian 5x5 filter (vertical pass) */
+class NEGaussian5x5VertKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGaussian5x5VertKernel";
+ }
+ /** Default constructor */
+ NEGaussian5x5VertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussian5x5VertKernel(NEGaussian5x5VertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussian5x5VertKernel &operator=(NEGaussian5x5VertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGaussian5x5VertKernel(NEGaussian5x5VertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGaussian5x5VertKernel &operator=(NEGaussian5x5VertKernel &&) = default;
+ /** Default destructor */
+ ~NEGaussian5x5VertKernel() = default;
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] input Source tensor. Data type supported: S16.
+ * @param[out] output Destination tensor, Data type supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h"
+#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H
+#define ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H
+
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform a GaussianPyramid (horizontal pass) */
+class NEGaussianPyramidHorKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGaussianPyramidHorKernel";
+ }
+ /** Default constructor */
+ NEGaussianPyramidHorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &&) = default;
+ /** Default destructor */
+ ~NEGaussianPyramidHorKernel() = default;
+
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[out] output Destination tensor. Output should have half the input width. Data type supported: S16.
+ */
+ void configure(const ITensor *input, ITensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ int _l2_load_offset;
+};
+
+/** NEON kernel to perform a GaussianPyramid (vertical pass) */
+class NEGaussianPyramidVertKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEGaussianPyramidVertKernel";
+ }
+ /** Default constructor */
+ NEGaussianPyramidVertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &&) = default;
+ /** Default destructor */
+ ~NEGaussianPyramidVertKernel() = default;
+
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] input Source tensor. Data type supported: S16.
+ * @param[out] output Destination tensor. Output should have half the input height. Data type supported: U8.
+ */
+ void configure(const ITensor *input, ITensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ int _t2_load_offset;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
+#include "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H
+#define ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for Compute All Anchors kernel */
+class NEComputeAllAnchorsKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEComputeAllAnchorsKernel";
+ }
+
+ /** Default constructor */
+ NEComputeAllAnchorsKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEComputeAllAnchorsKernel(const NEComputeAllAnchorsKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEComputeAllAnchorsKernel &operator=(const NEComputeAllAnchorsKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEComputeAllAnchorsKernel(NEComputeAllAnchorsKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEComputeAllAnchorsKernel &operator=(NEComputeAllAnchorsKernel &&) = default;
+ /** Default destructor */
+ ~NEComputeAllAnchorsKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
+ * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+ * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+ *
+ */
+ void configure(const ITensor *anchors, ITensor *all_anchors, const ComputeAnchorsInfo &info);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel
+ *
+ * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
+ * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+ * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+ *
+ * @return a Status
+ */
+ static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ template <typename T>
+ void internal_run(const Window &window);
+
+ const ITensor *_anchors;
+ ITensor *_all_anchors;
+ ComputeAnchorsInfo _anchors_info;
+};
+} // arm_compute
+#endif // ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h"
+#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/HOGInfo.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H
+#define ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H
+
+#include "arm_compute/core/IHOG.h"
+#include "arm_compute/core/Size2D.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform HOG Orientation Binning */
+class NEHOGOrientationBinningKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEHOGOrientationBinningKernel";
+ }
+ /** Default constructor */
+ NEHOGOrientationBinningKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHOGOrientationBinningKernel(const NEHOGOrientationBinningKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHOGOrientationBinningKernel &operator=(const NEHOGOrientationBinningKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEHOGOrientationBinningKernel(NEHOGOrientationBinningKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEHOGOrientationBinningKernel &operator=(NEHOGOrientationBinningKernel &&) = default;
+ /** Default destructor */
+ ~NEHOGOrientationBinningKernel() = default;
+
+ /** Initialise the kernel's inputs, output and HOG's metadata
+ *
+ * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
+ * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
+ * @param[out] output Output tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
+ * @param[in] hog_info HOG's metadata
+ */
+ void configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Common signature for all the specialised block normalization functions
+ *
+ * @param[in] mag_row_ptr Pointer to the first row of the cell in the magnitude tensor
+ * @param[in] phase_row_ptr Pointer to the first row of the cell in the phase tensor
+ * @param[out] output_ptr Pointer to the output cell of hog space tensor
+ * @param[in] mag_stride Stride of the magnitude tensor
+ * @param[in] phase_stride Stride of the phase tensor
+ * @param[in] cell_width Width of the cell
+ * @param[in] cell_height Height of the cell
+ * @param[in] num_bins Number of bins for each cell
+ * @param[in] phase_scale Scale factor to apply to the phase in order to calculate the histogram index
+ */
+ using OrientBinFunc = void(const int16_t *__restrict mag_row_ptr, const uint8_t *__restrict phase_row_ptr, float *__restrict output_ptr, size_t mag_stride, size_t phase_stride, size_t cell_width,
+ size_t cell_height, size_t num_bins, float phase_scale);
+ /** Orientation binning function to use for the particular cell width passed to configure() */
+ OrientBinFunc *_func;
+ const ITensor *_input_magnitude;
+ const ITensor *_input_phase;
+ ITensor *_output;
+ size_t _cell_width;
+ size_t _cell_height;
+ size_t _num_bins;
+ float _phase_scale;
+};
+
+/** NEON kernel to perform HOG block normalization */
+class NEHOGBlockNormalizationKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEHOGBlockNormalizationKernel";
+ }
+ /** Default constructor */
+ NEHOGBlockNormalizationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHOGBlockNormalizationKernel(const NEHOGBlockNormalizationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHOGBlockNormalizationKernel &operator=(const NEHOGBlockNormalizationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEHOGBlockNormalizationKernel(NEHOGBlockNormalizationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEHOGBlockNormalizationKernel &operator=(NEHOGBlockNormalizationKernel &&) = default;
+ /** Default destructor */
+ ~NEHOGBlockNormalizationKernel() = default;
+
+ /** Initialise the kernel's input, output and HOG's metadata
+ *
+ * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
+ * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
+ * @param[in] hog_info HOG's metadata
+ */
+ void configure(const ITensor *input, ITensor *output, const HOGInfo *hog_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Common signature for all the specialised block normalization functions
+ *
+ * @param[in] input_row_ptr Pointer to the first row of the block in the input hog space tensor
+ * @param[out] output_ptr Pointer to the output block of the hog normalized space
+ * @param[in] input_stride Stride of the input hog space tensor
+ * @param[in] num_cells_per_block_height Number of cells per block along the Y direction
+ * @param[in] num_bins_block_x Number of bins per block along the X direction
+ * @param[in] num_bins_block Number of total bins per block
+ * @param[in] l2_hyst_threshold Threshold to use for l2 hysteresis normalization
+ */
+ using BlockNormFunc = void(const float *input_row_ptr, float *output_ptr, size_t input_stride, size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block,
+ float l2_hyst_threshold);
+ /** Block normalization function to use for the particular normalization type passed to configure() */
+ BlockNormFunc *_func;
+ const ITensor *_input;
+ ITensor *_output;
+ Size2D _num_cells_per_block;
+ Size2D _num_cells_per_block_stride;
+ size_t _num_bins;
+ float _l2_hyst_threshold;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h"
+#include "src/core/NEON/kernels/NEHOGDetectorKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/HOGInfo.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEHOGDETECTORKERNEL_H
+#define ARM_COMPUTE_NEHOGDETECTORKERNEL_H
+
+#include "arm_compute/core/IArray.h"
+#include "arm_compute/core/IHOG.h"
+#include "src/core/NEON/INEKernel.h"
+#include "support/Mutex.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform HOG detector kernel using linear SVM */
+class NEHOGDetectorKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEHOGDetectorKernel";
+ }
+ /** Default constructor */
+ NEHOGDetectorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHOGDetectorKernel(const NEHOGDetectorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHOGDetectorKernel &operator=(const NEHOGDetectorKernel &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEHOGDetectorKernel(NEHOGDetectorKernel &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEHOGDetectorKernel &operator=(NEHOGDetectorKernel &&) = delete;
+ /** Default destructor */
+ ~NEHOGDetectorKernel() = default;
+
+ /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
+ *
+ * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref NEHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
+ * @param[in] hog HOG data object used by @ref NEHOGOrientationBinningKernel and @ref NEHOGBlockNormalizationKernel
+ * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects
+ * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
+ * It must be multiple of the hog->info()->block_stride()
+ * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
+ * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
+ */
+ void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ IDetectionWindowArray *_detection_windows;
+ const float *_hog_descriptor;
+ float _bias;
+ float _threshold;
+ uint16_t _idx_class;
+ size_t _num_bins_per_descriptor_x;
+ size_t _num_blocks_per_descriptor_y;
+ size_t _block_stride_width;
+ size_t _block_stride_height;
+ size_t _detection_window_width;
+ size_t _detection_window_height;
+ size_t _max_num_detection_windows;
+ arm_compute::Mutex _mutex;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEHOGDETECTORKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
+#include "src/core/NEON/kernels/NEHarrisCornersKernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEHARRISCORNERSKERNEL_H
+#define ARM_COMPUTE_NEHARRISCORNERSKERNEL_H
+
+#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
+#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
+#include "arm_compute/core/IArray.h"
+#include "src/core/NEON/INEKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensor;
+using IImage = ITensor;
+
+/** Common interface for all Harris Score kernels */
+class INEHarrisScoreKernel : public INEKernel
+{
+public:
+ /** Default constructor */
+ INEHarrisScoreKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ INEHarrisScoreKernel(const INEHarrisScoreKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ INEHarrisScoreKernel &operator=(const INEHarrisScoreKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ INEHarrisScoreKernel(INEHarrisScoreKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ INEHarrisScoreKernel &operator=(INEHarrisScoreKernel &&) = default;
+ /** Default destructor */
+ ~INEHarrisScoreKernel() = default;
+
+public:
+ /** Setup the kernel parameters
+ *
+ * @param[in] input1 Source image (gradient X). Data types supported: S16/S32
+ * @param[in] input2 Source image (gradient Y). Data types supported: same as @ input1
+ * @param[out] output Destination image (harris score). Data types supported: F32
+ * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0)
+ * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
+ * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ virtual void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) = 0;
+
+protected:
+ const IImage *_input1; /**< Source image - Gx component */
+ const IImage *_input2; /**< Source image - Gy component */
+ IImage *_output; /**< Source image - Harris score */
+ float _sensitivity; /**< Sensitivity value */
+ float _strength_thresh; /**< Threshold value */
+ float _norm_factor; /**< Normalization factor */
+ BorderSize _border_size; /**< Border size */
+};
+
+/** Template NEON kernel to perform Harris Score.
+ * The implementation supports 3, 5, and 7 for the block_size
+ */
+template <int32_t block_size>
+class NEHarrisScoreKernel : public INEHarrisScoreKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEHarrisScoreKernel";
+ }
+ /** Default constructor */
+ NEHarrisScoreKernel();
+ // Inherited methods overridden:
+ void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override;
+ BorderSize border_size() const override;
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Common signature for all the specialised harris score functions */
+ using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride,
+ float norm_factor, float sensitivity, float strength_thresh);
+ /** Harris Score function to use for the particular image types passed to configure() */
+ HarrisScoreFunction *_func;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEHARRISCORNERSKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H
+#define ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the height concatenate kernel.
+ * The input tensor will be concatenated into the output tensor.
+ */
+class NEHeightConcatenateLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEHeightConcatenateLayerKernel";
+ }
+ /** Default constructor */
+ NEHeightConcatenateLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHeightConcatenateLayerKernel(const NEHeightConcatenateLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHeightConcatenateLayerKernel &operator=(const NEHeightConcatenateLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEHeightConcatenateLayerKernel(NEHeightConcatenateLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEHeightConcatenateLayerKernel &operator=(NEHeightConcatenateLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEHeightConcatenateLayerKernel() = default;
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] input Input tensor info. Data types supported: All
+ * @param[in] height_offset The starting offset on the Y axis for the output tensor.
+ * @param[in,out] output Output tensor info. Data types supported: Same as @p input.
+ *
+ */
+ void configure(const ITensorInfo *input, unsigned int height_offset, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEHeightConcatenateLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: All
+ * @param[in] height_offset The starting offset on the Y axis for the output tensor.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+
+private:
+ unsigned int _height_offset;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h"
+#include "src/core/NEON/kernels/NEHistogramKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEHISTOGRAMKERNEL_H
+#define ARM_COMPUTE_NEHISTOGRAMKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+#include "support/Mutex.h"
+
+#include <cstddef>
+#include <cstdint>
+
+namespace arm_compute
+{
+class IDistribution1D;
+class ITensor;
+using IImage = ITensor;
+
+/** Interface for the histogram kernel */
+class NEHistogramKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEHistogramKernel";
+ }
+ /** Default constructor */
+ NEHistogramKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHistogramKernel(const NEHistogramKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEHistogramKernel &operator=(const NEHistogramKernel &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEHistogramKernel(NEHistogramKernel &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEHistogramKernel &operator=(NEHistogramKernel &&) = delete;
+ /** Default destructor */
+ ~NEHistogramKernel() = default;
+
+ /** Set the input image and the distribution output.
+ *
+ * @param[in] input Source image. Data type supported: U8.
+ * @param[out] output Destination distribution.
+ * @param[in,out] local_hist Array that the threads use to save their local histograms.
+ * It's size should be equal to (number_of_threads * num_bins),
+ * and the Window::thread_id() is used to determine the part of the array
+ * used by each thread.
+ * @param[out] window_lut LUT with pre-calculated possible window values.
+ * The size of the LUT should be equal to max_range_size and it will be filled
+ * during the configure stage, while it re-used in every run, therefore can be
+ * safely shared among threads.
+ */
+ void configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut);
+ /** Set the input image and the distribution output.
+ *
+ * @note Used for histogram of fixed size equal to 256
+ *
+ * @param[in] input Source image. Data type supported: U8.
+ * @param[out] output Destination distribution which must be of 256 bins..
+ */
+ void configure(const IImage *input, IDistribution1D *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Function to merge multiple partial histograms.
+ *
+ * @param[out] global_hist Pointer to the final histogram.
+ * @param[in] local_hist Pointer to the partial histograms.
+ * @param[in] bins Number of bins.
+ */
+ void merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins);
+ /** Function to merge multiple minimum values of partial histograms.
+ *
+ * @param[out] global_min Pointer to the global min value.
+ * @param[in] local_min Local min value.
+ */
+ void merge_min(uint8_t *global_min, const uint8_t &local_min);
+ /** Function to perform histogram on the given window
+ *
+ * @param[in] win Region on which to execute the kernel
+ * @param[in] info Info about the executing thread
+ */
+ void histogram_U8(Window win, const ThreadInfo &info);
+ /** Function to perform histogram on the given window where histogram is
+ * of fixed size 256 without ranges and offsets.
+ *
+ * @param[in] win Region on which to execute the kernel
+ * @param[in] info Info about the executing thread
+ */
+ void histogram_fixed_U8(Window win, const ThreadInfo &info);
+ /** Pre-calculate the pixel windowing for every possible pixel
+ *
+ * Calculate (V - offset) * numBins / range where V is every possible pixel value.
+ *
+ * @note We currently support U8 image thus possible pixel values are between 0 and 255
+ */
+ void calculate_window_lut() const;
+ /** Common signature for all the specialised Histogram functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using HistogramFunctionPtr = void (NEHistogramKernel::*)(Window window, const ThreadInfo &info);
+
+ HistogramFunctionPtr _func; ///< Histogram function to use for the particular image types passed to configure()
+ const IImage *_input;
+ IDistribution1D *_output;
+ uint32_t *_local_hist;
+ uint32_t *_window_lut;
+ arm_compute::Mutex _hist_mtx;
+ static constexpr unsigned int _max_range_size{ 256 }; ///< 256 possible pixel values as we handle only U8 images
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEHISTOGRAMKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
+#include "src/core/NEON/kernels/NEIm2ColKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEIM2COLKERNEL_H
+#define ARM_COMPUTE_NEIM2COLKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+class Size2D;
+
+/** Interface for the im2col reshape kernel.
+ *
+ * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column.
+ * It is used to transform a convolution to a plain matrix multiplication.
+ *
+ * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have:
+ *
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccc}
+ * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\
+ * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\
+ * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\
+ * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * @f]
+ */
+class NEIm2ColKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEIm2ColKernel";
+ }
+ /** Default constructor */
+ NEIm2ColKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEIm2ColKernel(const NEIm2ColKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEIm2ColKernel &operator=(const NEIm2ColKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEIm2ColKernel(NEIm2ColKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEIm2ColKernel &operator=(NEIm2ColKernel &&) = default;
+ /** Default destructor */
+ ~NEIm2ColKernel() = default;
+
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
+ * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false
+ * @param[out] output The output tensor. Data types supported: Same as @p input
+ * @param[in] kernel_dims The kernel dimensions (width and height).
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] has_bias In case biases are provided expands the matrix with 1.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
+ */
+ void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
+ bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel
+ *
+ * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
+ * Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false
+ * @param[in] output The output tensor. Data types supported: Same as @p input
+ * @param[in] kernel_dims The kernel dimensions (width and height).
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] has_bias In case biases are provided expands the matrix with 1.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
+ bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Template function to run im2col
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <typename T, bool has_pads, bool is_nchw>
+ void run_im2col(const Window &window);
+
+ /** Common signature for all the specialised im2col functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using Im2ColFunctionPtr = void (NEIm2ColKernel::*)(const Window &window);
+
+ Im2ColFunctionPtr _func;
+ const ITensor *_input;
+ ITensor *_output;
+ std::pair<unsigned int, unsigned int> _convolved_dims;
+ PadStrideInfo _conv_info;
+ unsigned int _kernel_width;
+ unsigned int _kernel_height;
+ bool _has_bias;
+ Size2D _dilation;
+ DataLayout _data_layout;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEIM2COLKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h"
+#include "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H
+#define ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+struct InstanceNormalizationLayerKernelInfo;
+
+/** Interface for performing an instance normalization */
+class NEInstanceNormalizationLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEInstanceNormalizationLayerKernel";
+ }
+ /** Default constructor */
+ NEInstanceNormalizationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEInstanceNormalizationLayerKernel(const NEInstanceNormalizationLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEInstanceNormalizationLayerKernel &operator=(const NEInstanceNormalizationLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEInstanceNormalizationLayerKernel(NEInstanceNormalizationLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEInstanceNormalizationLayerKernel &operator=(NEInstanceNormalizationLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEInstanceNormalizationLayerKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW
+ * In case of @p output tensor = nullptr this tensor will store the result of the normalization.
+ * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
+ * @param[in] info Kernel meta-data descriptor
+ */
+ void configure(ITensor *input, ITensor *output, const InstanceNormalizationLayerKernelInfo &info);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEInstanceNormalizationLayer.
+ *
+ * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NCHW
+ * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
+ * @param[in] info Kernel meta-data descriptor
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Common signature for all the specialized instance normalization functions
+ *
+ * @param[in, out] input An input tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
+ * @param[out] output The output tensor.
+ * @param[in] gamma The scale scalar value applied to the normalized tensor. Defaults to 1.0
+ * @param[in] beta The offset scalar value applied to the normalized tensor. Defaults to 0.0
+ * @param[in] epsilon Lower bound value for the normalization. Defaults to 1e-12
+ */
+ using NormalizationFunction = void(ITensor *input, ITensor *output, float gamma, float beta, float epsilon, const Window &window);
+
+ NormalizationFunction *_func;
+ ITensor *_input;
+ ITensor *_output;
+ float _gamma;
+ float _beta;
+ float _epsilon;
+ bool _use_mixed_precision{ true };
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h"
+#include "src/core/NEON/kernels/NEIntegralImageKernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H
+#define ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H
+
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Kernel to perform an image integral on an image */
+class NEIntegralImageKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEIntegralImageKernel";
+ }
+ /** Default constructor */
+ NEIntegralImageKernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEIntegralImageKernel(const NEIntegralImageKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEIntegralImageKernel &operator=(const NEIntegralImageKernel &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEIntegralImageKernel(NEIntegralImageKernel &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEIntegralImageKernel &operator=(NEIntegralImageKernel &&) = delete;
+ /** Default destructor */
+ ~NEIntegralImageKernel() = default;
+ /** Set the source, destination and border mode of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: U8
+ * @param[out] output Destination tensor. Data type supported: U32
+ */
+ void configure(const ITensor *input, ITensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+ bool is_parallelisable() const override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
+#include "src/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H
+#define ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */
+class NEL2NormalizeLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEL2NormalizeLayerKernel";
+ }
+ /** Default constructor */
+ NEL2NormalizeLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEL2NormalizeLayerKernel(const NEL2NormalizeLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEL2NormalizeLayerKernel &operator=(const NEL2NormalizeLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEL2NormalizeLayerKernel(NEL2NormalizeLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEL2NormalizeLayerKernel &operator=(NEL2NormalizeLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEL2NormalizeLayerKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: F16/F32.
+ * @param[in] sum Sum values tensor. Data types supported: same as @p input.
+ * Sum will have the same number of dimensions as input.
+ * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
+ * @param[in] epsilon Lower bound value for the normalization.
+ */
+ void configure(const ITensor *input, const ITensor *sum, ITensor *output, int axis, float epsilon);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEL2NormalizeLayerKernel.
+ *
+ * @param[in] input Source tensor info. Data types supported: F16/F32.
+ * @param[in] sum Sum values tensor info. Data types supported: same as @p input.
+ * Sum will have the same number of dimensions as input.
+ * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
+ * @param[in] epsilon Lower bound value for the normalization.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ const ITensor *_sum;
+ ITensor *_output;
+ unsigned int _actual_axis;
+ float _epsilon;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
+#include "src/core/NEON/kernels/NELKTrackerKernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_LKTRACKERKERNEL_H
+#define ARM_COMPUTE_LKTRACKERKERNEL_H
+
+#include "arm_compute/core/IArray.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <tuple>
+#include <utility>
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for NEON Array of Internal Key Points. */
+using INELKInternalKeypointArray = IArray<NELKInternalKeypoint>;
+
+/** Interface for the Lucas-Kanade tracker kernel */
+class NELKTrackerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NELKTrackerKernel";
+ }
+ /** Default constructor */
+ NELKTrackerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELKTrackerKernel(const NELKTrackerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELKTrackerKernel &operator=(const NELKTrackerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NELKTrackerKernel(NELKTrackerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NELKTrackerKernel &operator=(NELKTrackerKernel &&) = default;
+ /** Default destructor */
+ ~NELKTrackerKernel() = default;
+
+ /** Initialise the kernel input and output
+ *
+ * @param[in] input_old Pointer to the input old tensor. Data type supported: U8
+ * @param[in] input_new Pointer to the input new tensor. Data type supported. U8
+ * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data type supported: S16
+ * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data type supported: S16
+ * @param[in] old_points Pointer to the IKeyPointArray storing old key points
+ * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points
+ * @param[out] new_points Pointer to the IKeyPointArray storing new key points
+ * @param[in, out] old_points_internal Pointer to the array of NELKInternalKeypoint for old points
+ * @param[out] new_points_internal Pointer to the array of NELKInternalKeypoint for new points
+ * @param[in] termination The criteria to terminate the search of each keypoint.
+ * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
+ * @param[in] epsilon The error for terminating the algorithm
+ * @param[in] num_iterations The maximum number of iterations before terminate the algorithm
+ * @param[in] window_dimension The size of the window on which to perform the algorithm
+ * @param[in] level The pyramid level
+ * @param[in] num_levels The number of pyramid levels
+ * @param[in] pyramid_scale Scale factor used for generating the pyramid
+ */
+ void configure(const ITensor *input_old, const ITensor *input_new, const ITensor *old_scharr_gx, const ITensor *old_scharr_gy,
+ const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, IKeyPointArray *new_points,
+ INELKInternalKeypointArray *old_points_internal, INELKInternalKeypointArray *new_points_internal,
+ Termination termination, bool use_initial_estimate, float epsilon, unsigned int num_iterations, size_t window_dimension,
+ size_t level, size_t num_levels, float pyramid_scale);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ /** Initialise the array of keypoints in the provide range
+ *
+ * @param[in] start Index of first element in the keypoints array to be initialised
+ * @param[in] end Index after last elelemnt in the keypoints array to be initialised
+ */
+ void init_keypoints(int start, int end);
+ /** Compute the structure tensor A^T * A based on the scharr gradients I_x and I_y
+ *
+ * @param[in] keypoint Keypoint for which gradients are computed
+ * @param[out] bilinear_ix Intermediate interpolated data for X gradient
+ * @param[out] bilinear_iy Intermediate interpolated data for Y gradient
+ *
+ * @return Values A11, A12, A22
+ */
+ std::tuple<int, int, int> compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int32_t *bilinear_ix, int32_t *bilinear_iy);
+ /** Compute the vector A^T * b, i.e. -sum(I_d * I_t) for d in {x,y}
+ *
+ * @param[in] old_keypoint Old keypoint for which gradient is computed
+ * @param[in] new_keypoint New keypoint for which gradient is computed
+ * @param[in] bilinear_ix Intermediate interpolated data for X gradient
+ * @param[in] bilinear_iy Intermediate interpolated data for Y gradient
+ *
+ * @return Values b1, b2
+ */
+ std::pair<int, int> compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int32_t *bilinear_ix, const int32_t *bilinear_iy);
+
+ const ITensor *_input_old;
+ const ITensor *_input_new;
+ const ITensor *_old_scharr_gx;
+ const ITensor *_old_scharr_gy;
+ IKeyPointArray *_new_points;
+ const IKeyPointArray *_new_points_estimates;
+ const IKeyPointArray *_old_points;
+ INELKInternalKeypointArray *_old_points_internal;
+ INELKInternalKeypointArray *_new_points_internal;
+ Termination _termination;
+ bool _use_initial_estimate;
+ float _pyramid_scale;
+ float _epsilon;
+ unsigned int _num_iterations;
+ int _window_dimension;
+ unsigned int _level;
+ unsigned int _num_levels;
+ ValidRegion _valid_region;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NELKTRACKERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
+#define ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to multiply each row of first tensor with low 2 dimensions of second tensor. */
+class NELocallyConnectedMatrixMultiplyKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NELocallyConnectedMatrixMultiplyKernel";
+ }
+ /** Default constructor */
+ NELocallyConnectedMatrixMultiplyKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELocallyConnectedMatrixMultiplyKernel(const NELocallyConnectedMatrixMultiplyKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELocallyConnectedMatrixMultiplyKernel &operator=(const NELocallyConnectedMatrixMultiplyKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NELocallyConnectedMatrixMultiplyKernel(NELocallyConnectedMatrixMultiplyKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NELocallyConnectedMatrixMultiplyKernel &operator=(NELocallyConnectedMatrixMultiplyKernel &&) = default;
+ /** Default destructor */
+ ~NELocallyConnectedMatrixMultiplyKernel() = default;
+ /** Initialise the kernel's input and output
+ *
+ * @param[in] input0 First input tensor. Data types supported: F16, F32
+ * @param[in] input1 Second input tensor containing the Matrix B. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+ */
+ void configure(const ITensor *input0, const ITensor *input1, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NELocallyConnectedMatrixMultiplyKernel
+ *
+ * @param[in] input0 First input tensor info. Data types supported: F16, F32
+ * @param[in] input1 Second input tensor info. Data type supported: same as @p input0
+ * @param[in] output Output tensor info. Data type supported: same as @p input0
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input0;
+ const ITensor *_input1;
+ ITensor *_output;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h"
+#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H
+#define ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Template interface for the kernel to compute magnitude and phase */
+template <MagnitudeType mag_type, PhaseType phase_type>
+class NEMagnitudePhaseKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEMagnitudePhaseKernel";
+ }
+ /** Default constructor */
+ NEMagnitudePhaseKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMagnitudePhaseKernel(const NEMagnitudePhaseKernel &) = delete;
+ /** Default move constructor */
+ NEMagnitudePhaseKernel(NEMagnitudePhaseKernel &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMagnitudePhaseKernel &operator=(const NEMagnitudePhaseKernel &) = delete;
+ /** Default move assignment operator */
+ NEMagnitudePhaseKernel &operator=(NEMagnitudePhaseKernel &&) = default;
+ /** Destructor */
+ ~NEMagnitudePhaseKernel() = default;
+
+ /** Initialise the kernel's input, output.
+ *
+ * @note At least one of out1 or out2 must be set
+ *
+ * @param[in] gx Gradient X tensor. Data type supported: S16.
+ * @param[in] gy Gradient Y tensor. Data type supported: S16.
+ * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16.
+ * @param[out] phase (Optional) The output tensor - Phase. Data type supported: U8.
+ */
+ void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Function to perform magnitude on the given window
+ *
+ * @param[in] window Region on which to execute the kernel
+ */
+ void magnitude(const Window &window);
+ /** Function to perform phase on the given window
+ *
+ * @param[in] window Region on which to execute the kernel
+ */
+ void phase(const Window &window);
+ /** Function to perform magnitude and phase on the given window
+ *
+ * @param[in] window Region on which to execute the kernel
+ */
+ void magnitude_phase(const Window &window);
+
+private:
+ /** Common signature for all the specialised MagnitudePhase functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseKernel::*)(const Window &window);
+ /** MagnitudePhase function to use for the particular formats passed to configure() */
+ MagnitudePhaseFunctionPtr _func;
+ const ITensor *_gx; /**< Input gradient X */
+ const ITensor *_gy; /**< Input gradient Y */
+ ITensor *_magnitude; /**< Output - Magnitude */
+ ITensor *_phase; /**< Output - Phase */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h"
+#include "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
--- /dev/null
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H
+#define ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the pooling layer kernel */
+class NEMaxUnpoolingLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEMaxUnpoolingLayerKernel";
+ }
+ /** Default constructor */
+ NEMaxUnpoolingLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMaxUnpoolingLayerKernel(const NEMaxUnpoolingLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMaxUnpoolingLayerKernel &operator=(const NEMaxUnpoolingLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEMaxUnpoolingLayerKernel(NEMaxUnpoolingLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEMaxUnpoolingLayerKernel &operator=(NEMaxUnpoolingLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEMaxUnpoolingLayerKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @note Output shape must be equal to the shape of the original input to pool.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] indices Tensor containing the offset to store the input elements in the output tensor.
+ * @ref NEPoolingLayerKernel with indices should precede this function in order to
+ * properly reconstruct the output tensor.
+ * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32.
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ */
+ void configure(const ITensor *input, const ITensor *indices, ITensor *output, const PoolingLayerInfo &pool_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEMaxUnpoolingLayerKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] output Destination tensor info. Data types supported: Same as @p input.
+ * @param[in] indices Tensor info of the indices of the maximal values. Data type supported: U32.
+ * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ */
+ template <typename T>
+ void unpooling2(const Window &window_input);
+
+ using UnpoolingFunction = void (NEMaxUnpoolingLayerKernel::*)(const Window &window);
+
+private:
+ UnpoolingFunction _func;
+ const ITensor *_input;
+ ITensor *_output;
+ const ITensor *_indices;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h"
+#include "src/core/NEON/kernels/NEMeanStdDevKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEMEANSTDDEVKERNEL_H
+#define ARM_COMPUTE_NEMEANSTDDEVKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+#include "support/Mutex.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensor;
+using IImage = ITensor;
+
+/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */
+class NEMeanStdDevKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEMeanStdDevKernel";
+ }
+ /** Default constructor */
+ NEMeanStdDevKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMeanStdDevKernel(const NEMeanStdDevKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMeanStdDevKernel &operator=(const NEMeanStdDevKernel &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMeanStdDevKernel(NEMeanStdDevKernel &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMeanStdDevKernel &operator=(NEMeanStdDevKernel &&) = delete;
+ /** Default destructor */
+ ~NEMeanStdDevKernel() = default;
+
+ /** Initialise the kernel's input and outputs.
+ *
+ * @param[in] input Input image. Data type supported: U8.
+ * @param[out] mean Input average pixel value.
+ * @param[out] global_sum Keeps global sum of pixel values.
+ * @param[out] stddev (Optional) Output standard deviation of pixel values.
+ * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values.
+ */
+ void configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev = nullptr, uint64_t *global_sum_squared = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+ BorderSize border_size() const override;
+
+private:
+ const IImage *_input;
+ float *_mean;
+ float *_stddev;
+ uint64_t *_global_sum;
+ uint64_t *_global_sum_squared;
+ arm_compute::Mutex _mtx;
+ BorderSize _border_size;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEMEANSTDDEVKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h"
+#include "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H
+#define ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#include <arm_fp16.h>
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */
+class NEMeanStdDevNormalizationKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEMeanStdDevNormalizationKernel";
+ }
+ /** Default constructor */
+ NEMeanStdDevNormalizationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMeanStdDevNormalizationKernel(const NEMeanStdDevNormalizationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMeanStdDevNormalizationKernel &operator=(const NEMeanStdDevNormalizationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEMeanStdDevNormalizationKernel(NEMeanStdDevNormalizationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEMeanStdDevNormalizationKernel &operator=(NEMeanStdDevNormalizationKernel &&) = default;
+ /** Default destructor */
+ ~NEMeanStdDevNormalizationKernel() = default;
+ /** Initialise the kernel's input and outputs.
+ *
+ * @note If the output tensor is a nullptr, the normalization will be performed in-place.
+ *
+ * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr,
+ * this tensor will store the result of the normalization. Data types supported: F16/F32.
+ * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
+ * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
+ */
+ void configure(ITensor *input, ITensor *output = nullptr, float epsilon = 1e-8f);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEMeanStdDevNormalizationKernel
+ *
+ * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr,
+ * this tensor will store the result of the normalization. Data types supported: F16/F32.
+ * @param[in] output (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input
+ * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Normalizes the input with respect to mean and standard deviation.
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ template <typename ScalarType, int size>
+ void mean_stddev_normalization(const Window &window);
+
+ ITensor *_input;
+ ITensor *_output;
+ float _epsilon;
+
+ using MeanStdDevNormFunction = void (NEMeanStdDevNormalizationKernel::*)(const Window &window);
+
+ MeanStdDevNormFunction _func;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h"
+#include "src/core/NEON/kernels/NEMedian3x3Kernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEMEDIAN3x3KERNEL_H
+#define ARM_COMPUTE_NEMEDIAN3x3KERNEL_H
+
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Kernel to perform a median filter on a tensor */
+class NEMedian3x3Kernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEMedian3x3Kernel";
+ }
+ /** Default constructor */
+ NEMedian3x3Kernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMedian3x3Kernel(const NEMedian3x3Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMedian3x3Kernel &operator=(const NEMedian3x3Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEMedian3x3Kernel(NEMedian3x3Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEMedian3x3Kernel &operator=(NEMedian3x3Kernel &&) = default;
+ /** Default destructor */
+ ~NEMedian3x3Kernel() = default;
+ /** Set the source, destination and border mode of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: U8
+ * @param[out] output Destination tensor. Data type supported: U8
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEMEDIAN3x3KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
+#include "src/core/NEON/kernels/NEMemsetKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEMEMSETKERNEL_H
+#define ARM_COMPUTE_NEMEMSETKERNEL_H
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for filling the planes of a tensor */
+class NEMemsetKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEMemsetKernel";
+ }
+ /** Default constructor */
+ NEMemsetKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMemsetKernel(const NEMemsetKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMemsetKernel &operator=(const NEMemsetKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEMemsetKernel(NEMemsetKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEMemsetKernel &operator=(NEMemsetKernel &&) = default;
+ /** Default destructor */
+ ~NEMemsetKernel() = default;
+ /** Initialise the kernel's tensor and filling value
+ *
+ * @param[in,out] tensor Input tensor to fill. Supported data types: All
+ * @param[in] constant_value The value used to fill the planes of the tensor
+ */
+ void configure(ITensor *tensor, const PixelValue &constant_value);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ ITensor *_tensor;
+ PixelValue _constant_value;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEMEMSETKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h"
+#include "src/core/NEON/kernels/NEMinMaxLayerKernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_NEMINMAXLAYERKERNEL_H
+#define ARM_COMPUTE_NEMINMAXLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+#include "support/Mutex.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to perform min max search on a 3D tensor. */
+class NEMinMaxLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEMinMaxLayerKernel";
+ }
+ /** Default constructor */
+ NEMinMaxLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMinMaxLayerKernel(const NEMinMaxLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMinMaxLayerKernel &operator=(const NEMinMaxLayerKernel &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMinMaxLayerKernel(NEMinMaxLayerKernel &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMinMaxLayerKernel &operator=(NEMinMaxLayerKernel &&) = delete;
+ /** Default destructor */
+ ~NEMinMaxLayerKernel() = default;
+
+ /** Initialise the kernel's input and outputs.
+ *
+ * @note output[0] = minimum
+ * @note output[1] = maximum
+ *
+ * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data type supported: F32.
+ * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum value for each 3D input tensor.
+ * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: F32.
+ * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
+ * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+ /** Resets global minimum and maximum. */
+ void reset();
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ void update_min_max(float *out_ptr, float min, float max);
+ const ITensor *_input;
+ ITensor *_output;
+ arm_compute::Mutex _mtx;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEMINMAXLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h"
+#include "src/core/NEON/kernels/NEMinMaxLocationKernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H
+#define ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H
+
+#include "arm_compute/core/IArray.h"
+#include "src/core/NEON/INEKernel.h"
+#include "support/Mutex.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensor;
+using IImage = ITensor;
+
+/** Interface for the kernel to perform min max search on an image. */
+class NEMinMaxKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEMinMaxKernel";
+ }
+ /** Default constructor */
+ NEMinMaxKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMinMaxKernel(const NEMinMaxKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMinMaxKernel &operator=(const NEMinMaxKernel &) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMinMaxKernel(NEMinMaxKernel &&) = delete;
+ /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+ NEMinMaxKernel &operator=(NEMinMaxKernel &&) = delete;
+ /** Default destructor */
+ ~NEMinMaxKernel() = default;
+
+ /** Initialise the kernel's input and outputs.
+ *
+ * @param[in] input Input Image. Data types supported: U8/S16/F32.
+ * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
+ * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
+ */
+ void configure(const IImage *input, void *min, void *max);
+ /** Resets global minimum and maximum. */
+ void reset();
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Performs the min/max algorithm on U8 images on a given window.
+ *
+ * @param win The window to run the algorithm on.
+ */
+ void minmax_U8(Window win);
+ /** Performs the min/max algorithm on S16 images on a given window.
+ *
+ * @param win The window to run the algorithm on.
+ */
+ void minmax_S16(Window win);
+ /** Performs the min/max algorithm on F32 images on a given window.
+ *
+ * @param win The window to run the algorithm on.
+ */
+ void minmax_F32(Window win);
+ /** Common signature for all the specialised MinMax functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using MinMaxFunction = void (NEMinMaxKernel::*)(Window window);
+ /** MinMax function to use for the particular image types passed to configure() */
+ MinMaxFunction _func;
+ /** Helper to update min/max values **/
+ template <typename T>
+ void update_min_max(T min, T max);
+
+ const IImage *_input; /**< Input image. */
+ void *_min; /**< Minimum value. */
+ void *_max; /**< Maximum value. */
+ arm_compute::Mutex _mtx; /**< Mutex used for result reduction. */
+};
+
+/** Interface for the kernel to find min max locations of an image. */
+class NEMinMaxLocationKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEMinMaxLocationKernel";
+ }
+ /** Default constructor */
+ NEMinMaxLocationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMinMaxLocationKernel(const NEMinMaxLocationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMinMaxLocationKernel &operator=(const NEMinMaxLocationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEMinMaxLocationKernel(NEMinMaxLocationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEMinMaxLocationKernel &operator=(NEMinMaxLocationKernel &&) = default;
+ /** Default destructor */
+ ~NEMinMaxLocationKernel() = default;
+
+ /** Initialise the kernel's input and outputs.
+ *
+ * @param[in] input Input Image. Data types supported: U8/S16/F32.
+ * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
+ * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
+ * @param[out] min_loc Array of minimum value locations.
+ * @param[out] max_loc Array of maximum value locations.
+ * @param[out] min_count Number of minimum value encounters.
+ * @param[out] max_count Number of maximum value encounters.
+ */
+ void configure(const IImage *input, void *min, void *max,
+ ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr,
+ uint32_t *min_count = nullptr, uint32_t *max_count = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ bool is_parallelisable() const override;
+
+private:
+ /** Performs the min/max location algorithm on T type images on a given window.
+ *
+ * @param win The window to run the algorithm on.
+ */
+ template <class T, bool count_min, bool count_max, bool loc_min, bool loc_max>
+ void minmax_loc(const Window &win);
+ /** Common signature for all the specialised MinMaxLoc functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using MinMaxLocFunction = void (NEMinMaxLocationKernel::*)(const Window &window);
+ /** MinMaxLoc function to use for the particular image types passed to configure() */
+ MinMaxLocFunction _func;
+ /** Helper to create a function pointer table for the parameterized MinMaxLocation functions. */
+ template <class T, typename>
+ struct create_func_table;
+
+ const IImage *_input; /**< Input image. */
+ void *_min; /**< Minimum value. */
+ void *_max; /**< Maximum value. */
+ uint32_t *_min_count; /**< Count of minimum value encounters. */
+ uint32_t *_max_count; /**< Count of maximum value encounters. */
+ ICoordinates2DArray *_min_loc; /**< Locations of minimum values. */
+ ICoordinates2DArray *_max_loc; /**< Locations of maximum values. */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h"
+#include "src/core/NEON/kernels/NENonLinearFilterKernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NENONLINEARFILTERKERNEL_H
+#define ARM_COMPUTE_NENONLINEARFILTERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to apply a non-linear filter */
+class NENonLinearFilterKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NENonLinearFilterKernel";
+ }
+ /** Default constructor */
+ NENonLinearFilterKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NENonLinearFilterKernel(NENonLinearFilterKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NENonLinearFilterKernel(NENonLinearFilterKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &&) = default;
+ /** Default destructor */
+ ~NENonLinearFilterKernel() = default;
+ /** Set the source, destination and border mode of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: U8
+ * @param[out] output Destination tensor. Data type supported: U8
+ * @param[in] function Non linear function to perform
+ * @param[in] mask_size Mask size. Supported sizes: 3, 5
+ * @param[in] pattern Mask pattern
+ * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ /** Fill mask with the corresponding given pattern.
+ *
+ * @param[in,out] mask Mask to be filled according to pattern
+ * @param[in] cols Columns (width) of mask
+ * @param[in] rows Rows (height) of mask
+ * @param[in] pattern Pattern to fill the mask according to
+ */
+ void fill_mask(uint8_t *mask, int cols, int rows, MatrixPattern pattern);
+ /** Apply a median filter when given mask pattern is defined as box.
+ *
+ * @param[in] win Window to apply the filter on.
+ */
+ template <int mask_w, int mask_h>
+ void median_filter_box(const Window &win);
+ /** Apply a min filter when given mask pattern is defined as box.
+ *
+ * @param[in] win Window to apply the filter on.
+ */
+ template <int mask_w, int mask_h>
+ void min_filter_box(const Window &win);
+ /** Apply a max filter when given mask pattern is defined as box.
+ *
+ * @param[in] win Window to apply the filter on.
+ */
+ template <int mask_w, int mask_h>
+ void max_filter_box(const Window &win);
+ /** Apply a median filter when given mask pattern is defined as cross.
+ *
+ * @param[in] win Window to apply the filter on.
+ */
+ template <int mask_w, int mask_h>
+ void median_filter_cross(const Window &win);
+ /** Apply a min filter when given mask pattern is defined as cross.
+ *
+ * @param[in] win Window to apply the filter on.
+ */
+ template <int mask_w, int mask_h>
+ void min_filter_cross(const Window &win);
+ /** Apply a max filter when given mask pattern is defined as cross.
+ *
+ * @param[in] win Window to apply the filter on.
+ */
+ template <int mask_w, int mask_h>
+ void max_filter_cross(const Window &win);
+ /** Apply a median filter when given mask pattern is defined as disk.
+ *
+ * @param[in] win Window to apply the filter on.
+ */
+ template <int mask_w, int mask_h>
+ void median_filter_disk(const Window &win);
+ /** Apply a min filter when given mask pattern is defined as disk.
+ *
+ * @param[in] win Window to apply the filter on.
+ */
+ template <int mask_w, int mask_h>
+ void min_filter_disk(const Window &win);
+ /** Apply a max filter when given mask pattern is defined as disk.
+ *
+ * @param[in] win Window to apply the filter on.
+ */
+ template <int mask_w, int mask_h>
+ void max_filter_disk(const Window &win);
+ /** Apply a non-linear filter when given mask has user-defined pattern.
+ *
+ * @param[in] win Window to apply the filter on.
+ */
+ template <int mask_w, int mask_h>
+ void non_linear_filter_generic(const Window &win);
+
+private:
+ unsigned int _border_width;
+ const ITensor *_input;
+ ITensor *_output;
+ const uint8_t *_mask;
+ MatrixPattern _pattern;
+ NonLinearFilterFunction _function;
+ unsigned int _func_idx;
+ BorderSize _border_size;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NENONLINEARFILTERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
+#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H
+#define ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface to perform Non-Maxima suppression over a 3x3 window using NEON
+ *
+ * @note Used by @ref NEFastCorners and @ref NEHarrisCorners
+ */
+class NENonMaximaSuppression3x3Kernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NENonMaximaSuppression3x3Kernel";
+ }
+ /** Default constructor */
+ NENonMaximaSuppression3x3Kernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NENonMaximaSuppression3x3Kernel(const NENonMaximaSuppression3x3Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NENonMaximaSuppression3x3Kernel &operator=(const NENonMaximaSuppression3x3Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NENonMaximaSuppression3x3Kernel(NENonMaximaSuppression3x3Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NENonMaximaSuppression3x3Kernel &operator=(NENonMaximaSuppression3x3Kernel &&) = default;
+ /** Default destructor */
+ ~NENonMaximaSuppression3x3Kernel() = default;
+
+ /** Initialise the kernel's sources, destinations and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8/F32
+ * @param[out] output Destination tensor. Data types supported: same as @p input
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+protected:
+ /** Common signature for all the specialised non-maxima suppression 3x3 functions
+ *
+ * @param[in] input_ptr Pointer to the input tensor.
+ * @param[out] output_ptr Pointer to the output tensor
+ * @param[in] input_stride Stride of the input tensor
+ */
+ using NonMaxSuppr3x3Function = void(const void *__restrict input_ptr, void *__restrict output_ptr, const uint32_t input_stride);
+
+ NonMaxSuppr3x3Function *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */
+ const ITensor *_input; /**< Source tensor */
+ ITensor *_output; /**< Destination tensor */
+};
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32
+ */
+class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel
+{
+public:
+ const char *name() const override
+ {
+ return "NENonMaximaSuppression3x3FP16Kernel";
+ }
+ /** Initialise the kernel's sources, destinations and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8/F32.
+ * @param[out] output Destination tensor. Data types supported: same as @p input
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output, bool border_undefined);
+};
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */
+using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel;
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+} // namespace arm_compute
+#endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h"
+#include "src/core/NEON/kernels/NENormalizationLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H
+#define ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the normalization layer kernel.
+ */
+class NENormalizationLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NENormalizationLayerKernel";
+ }
+ /** Default constructor */
+ NENormalizationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NENormalizationLayerKernel(const NENormalizationLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NENormalizationLayerKernel &operator=(const NENormalizationLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ NENormalizationLayerKernel(NENormalizationLayerKernel &&) = default;
+ /** Default move assignment operator */
+ NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default;
+ /** Default destructor */
+ ~NENormalizationLayerKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+ * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
+ * Data type and layout supported: same as @p input.
+ * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input.
+ * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
+ */
+ void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NENormalizationLayerKernel
+ *
+ * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+ * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
+ * Data type and layout supported: same as @p input.
+ * @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input.
+ * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, NormalizationLayerInfo norm_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Function to perform normalization depending on the given template
+ * dimension. The second template parameter specifies whether the
+ * normalization has to be 1D or 2D.
+ *
+ * @note Only supported normalizations are:
+ * - 1D over X or Z
+ * - 2D over X and Y
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ template <typename T, unsigned int S, unsigned int dim, bool do_2D_norm>
+ void normalize_float(const Window &window);
+
+ /** Common signature for all the specialised normalization functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using NormalizationFunction = void (NENormalizationLayerKernel::*)(const Window &window);
+
+private:
+ NormalizationFunction _func;
+ const ITensor *_input;
+ const ITensor *_input_squared;
+ ITensor *_output;
+ NormalizationLayerInfo _norm_info;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h"
+#include "src/core/NEON/kernels/NEPadLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEPADLAYERKERNEL_H
+#define ARM_COMPUTE_NEPADLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to add padding to a tensor
+ *
+ * Add padding given padding information
+ */
+class NEPadLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEPadLayerKernel";
+ }
+ /** Default constructor */
+ NEPadLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPadLayerKernel(const NEPadLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPadLayerKernel &operator=(const NEPadLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEPadLayerKernel(NEPadLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEPadLayerKernel &operator=(NEPadLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEPadLayerKernel() = default;
+
+ /** Initialize the function
+ *
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
+ * specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT.
+ * Only CONSTANT padding mode is currently supported
+ */
+ void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEPadLayer.
+ *
+ * @param[in] input Source tensor info. Data types supported: All.
+ * @param[in] output Output tensor info. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
+ * specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT.
+ * Only CONSTANT padding mode is currently supported
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Template function to run the padding function with constant padding
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <typename T>
+ void run_pad_constant(const Window &window);
+
+ /** Function to run the padding function with constant padding for 3D input and 1D, 2D, 3D padding
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ void run_pad_constant_uint8_3Dinput_3Dpad(const Window &window);
+
+ /** Common signature for all the specialised permute functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using PadFunctionPtr = void (NEPadLayerKernel::*)(const Window &window);
+
+ PadFunctionPtr _func;
+ const ITensor *_input;
+ ITensor *_output;
+ PaddingList _padding;
+ PixelValue _constant_value;
+ PaddingMode _mode;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEPADLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h"
+#include "src/core/NEON/kernels/NEPermuteKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
// Input window
Window window_in = window;
- // we only support these two configs in arm_compute/core/NEON/kernels/convolution/common/shims.hpp, for all others
+ // we only support these two configs in src/core/NEON/kernels/convolution/common/shims.hpp, for all others
// we have to fall back to C++
if((input_layout == DataLayout::NCHW && _perm == PermutationVector{ 2U, 0U, 1U }) || (input_layout == DataLayout::NHWC && _perm == PermutationVector{ 1U, 2U, 0U }))
{
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEPERMUTEKERNEL_H
+#define ARM_COMPUTE_NEPERMUTEKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** NEON kernel to perform tensor permutation.
+ *
+ * Permutes given a permutation vector
+ */
+class NEPermuteKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEPermuteKernel";
+ }
+ /** Default constructor */
+ NEPermuteKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPermuteKernel(const NEPermuteKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPermuteKernel &operator=(const NEPermuteKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEPermuteKernel(NEPermuteKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEPermuteKernel &operator=(NEPermuteKernel &&) = default;
+ /** Default destructor */
+ ~NEPermuteKernel() = default;
+
+ /** Set the input and output of the kernel.
+ *
+ * @note Arbitrary permutation vectors are supported with rank not greater than 4
+ *
+ * @param[in] input The input tensor to permute. Data types supported: All
+ * @param[out] output The output tensor. Data types supported: Same as @p input
+ * @param[in] perm Permutation vector
+ */
+ void configure(const ITensor *input, ITensor *output, const PermutationVector &perm);
+ /** Static function to check if given info will lead to a valid configuration of @ref CPPPermuteKernel
+ *
+ * @note Arbitrary permutation vectors are supported with rank not greater than 4
+ *
+ * @param[in] input The input tensor to permute. Data types supported: All
+ * @param[in] output The output tensor. Data types supported: Same as @p input
+ * @param[in] perm Permutation vector
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Template function to run the permute
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <typename T>
+ void run_permute(const Window &window);
+
+ /** Common signature for all the specialised permute functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using PermuteFunctionPtr = void (NEPermuteKernel::*)(const Window &window);
+
+ PermuteFunctionPtr _func;
+ const ITensor *_input;
+ ITensor *_output;
+ PermutationVector _perm;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEPERMUTEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
+#include "src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "src/core/CPP/Validate.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H
+#define ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to perform addition between two tensors */
+class NEPixelWiseMultiplicationKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEPixelWiseMultiplicationKernel";
+ }
+ /** Default constructor */
+ NEPixelWiseMultiplicationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPixelWiseMultiplicationKernel(const NEPixelWiseMultiplicationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPixelWiseMultiplicationKernel &operator=(const NEPixelWiseMultiplicationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEPixelWiseMultiplicationKernel(NEPixelWiseMultiplicationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEPixelWiseMultiplicationKernel &operator=(NEPixelWiseMultiplicationKernel &&) = default;
+ /** Default destructor */
+ ~NEPixelWiseMultiplicationKernel() = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * Support: Broadcast? Scale=1/255?
+ * - (U8,U8) -> U8, S16 N Y
+ * - (U8,S16) -> S16 N Y
+ * - (S16,U8) -> S16 N Y
+ * - (S16,S16) -> S16 N Y
+ * - (S32,S32) -> S32 Y N
+ * - (F16,F16) -> F16 N Y
+ * - (F32,F32) -> F32 Y Y
+ * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y
+ * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y
+ *
+ * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
+ * For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
+ *
+ * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
+ * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
+ * @param[in] scale Scale to apply after multiplication.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
+ * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255
+ * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype
+ * @param[in] rounding_policy Rounding policy.
+ */
+ void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplicationKernel
+ *
+ * Valid configurations (Input1,Input2) -> Output :
+ * Support: Broadcast? Scale=1/255?
+ * - (U8,U8) -> U8, S16 N Y
+ * - (U8,S16) -> S16 N Y
+ * - (S16,U8) -> S16 N Y
+ * - (S16,S16) -> S16 N Y
+ * - (S32,S32) -> S32 Y N
+ * - (F16,F16) -> F16 N Y
+ * - (F32,F32) -> F32 Y Y
+ * - (QASYMM8,QASYMM8) -> QASYMM8 Y Y
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED Y Y
+ * - (QSYMM16,QSYMM16) -> QSYMM16, S32 N Y
+ *
+ * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
+ * For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
+ *
+ * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
+ * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
+ * @param[in] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
+ * @param[in] scale Scale to apply after multiplication.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
+ * If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255
+ * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype
+ * @param[in] rounding_policy Rounding policy.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
+
+ // Inherited methods overridden
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Common signature for all the specialised multiplication functions with integer scaling factor
+ *
+ * @param[in] in1 Input1 tensor object.
+ * @param[in] in2 Input2 tensor object.
+ * @param[out] out Output tensor object.
+ * @param[in] window Region on which to execute the kernel
+ * @param[in] scale Integer scale factor.
+ */
+ using MulFunctionInt = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, int scale);
+ /** Common signature for all the specialised multiplication functions with float scaling factor
+ *
+ * @param[in] in1 Input1 tensor object.
+ * @param[in] in2 Input2 tensor object.
+ * @param[out] out Output tensor object.
+ * @param[in] window Region on which to execute the kernel
+ * @param[in] scale Float scale factor.
+ */
+ using MulFunctionFloat = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, float scale);
+ /** Common signature for all the specialised QASYMM8 multiplication functions with float scaling factor
+ *
+ * @param[in] in1 Input1 tensor object.
+ * @param[in] in2 Input2 tensor object.
+ * @param[out] out Output tensor object.
+ * @param[in] window Region on which to execute the kernel
+ * @param[in] scale Float scale factor.
+ *
+ */
+ using MulFunctionQuantized = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, float scale);
+
+ MulFunctionFloat *_func_float;
+ MulFunctionInt *_func_int;
+ MulFunctionQuantized *_func_quantized;
+
+private:
+ float _scale;
+ int _scale_exponent;
+};
+
+/** Interface for the complex pixelwise multiplication kernel. */
+class NEComplexPixelWiseMultiplicationKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEComplexPixelWiseMultiplicationKernel";
+ }
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
+ * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ */
+ void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplicationKernel
+ *
+ * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
+ * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+};
+
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
+#include "src/core/NEON/kernels/NEPoolingLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H
+#define ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the pooling layer kernel */
+class NEPoolingLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEPoolingLayerKernel";
+ }
+ /** Default constructor */
+ NEPoolingLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPoolingLayerKernel(const NEPoolingLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPoolingLayerKernel &operator=(const NEPoolingLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEPoolingLayerKernel(NEPoolingLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEPoolingLayerKernel &operator=(NEPoolingLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEPoolingLayerKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @note F16 are supported for pool sizes 2 and 3 only
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
+ */
+ void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info, ITensor *indices = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEPoolingLayerKernel
+ *
+ * @note F16 are supported for pool sizes 2 and 3 only
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ /** Function to perform 2x2 pooling.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ * @param[in] pooling_type Pooling operation to be computed.
+ * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
+ */
+ void pooling2_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
+ /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ */
+ void pooling2_f32_nhwc_maxpool_indices(const Window &window_input, const Window &window);
+ /** Function to perform MxN pooling for 32-bit floating point values.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ * @param[in] pooling_type Pooling operation to be computed.
+ * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
+ */
+ void poolingMxN_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
+ /** Function to perform MxN pooling for 32-bit floating point values (NHWC).
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ * @param[in] pooling_type Pooling operation to be computed.
+ * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
+ */
+ void poolingMxN_f32_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
+ /** Function to perform 7x7 pooling.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ * @param[in] pooling_type Pooling operation to be computed.
+ * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
+ */
+ void pooling7_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
+ /** Function to perform 3x3 pooling.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ * @param[in] pooling_type Pooling operation to be computed.
+ * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
+ */
+ void pooling3_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
+ /** Function to perform 2x2 pooling for float16_t.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ * @param[in] pooling_type Pooling operation to be computed.
+ * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
+ */
+ void pooling2_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
+ /** Function to perform 2x2 pooling and compute the pooling indices for FP32/FP16. The indices can be used for max unpool.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ */
+ template <typename T>
+ void pooling2_nchw_maxpool_indices(const Window &window_input, const Window &window);
+ /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ */
+ void pooling2_f16_nhwc_maxpool_indices(const Window &window_input, const Window &window);
+ /** Function to perform 3x3 pooling.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ * @param[in] pooling_type Pooling operation to be computed.
+ * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
+ */
+ void pooling3_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
+ /** Function to perform MxN pooling for 16-bit floating point values.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ * @param[in] pooling_type Pooling operation to be computed.
+ * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
+ */
+ void poolingMxN_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
+ /** Function to perform MxN pooling for 16-bit floating point values. (NHWC)
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ * @param[in] pooling_type Pooling operation to be computed.
+ * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
+ */
+ void poolingMxN_f16_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
+ /** Template function to perform 2x2 pooling for 8bit quantized fixed point. (NCHW)
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ * @param[in] pooling_type Pooling operation to be computed.
+ * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
+ */
+ template <typename T>
+ void pooling2_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
+ /** Template function to perform 3x3 pooling for 8bit quantized fixed point. (NCHW)
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ * @param[in] pooling_type Pooling operation to be computed.
+ * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
+ */
+ template <typename T>
+ void pooling3_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
+ /** Template function to perform MxN pooling for 8-bit quantized. (NCHW)
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ * @param[in] pooling_type Pooling operation to be computed.
+ * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
+ */
+ template <typename T>
+ void poolingMxN_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
+ /** Template function to perform MxN pooling for 8-bit quantized. (NHWC)
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ * @param[in] pooling_type Pooling operation to be computed.
+ * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
+ */
+ template <typename T>
+ void poolingMxN_q8_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
+ /** Common signature for all the specialised Pooling functions
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ * @param[in] pooling_type Pooling operation to be computed.
+ * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
+ */
+ using PoolingFunction = void (NEPoolingLayerKernel::*)(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding);
+
+private:
+ PoolingFunction _func;
+ const ITensor *_input;
+ ITensor *_output;
+ ITensor *_indices;
+ PoolingLayerInfo _pool_info;
+ DataLayout _data_layout;
+ unsigned int _num_elems_processed_per_iteration;
+ BorderSize _border_size;
+ bool _is_square;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h"
+#include "src/core/NEON/kernels/NEPriorBoxLayerKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H
+#define ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to calculate prior boxes */
+class NEPriorBoxLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEPriorBoxLayerKernel";
+ }
+ /** Default constructor */
+ NEPriorBoxLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPriorBoxLayerKernel(const NEPriorBoxLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPriorBoxLayerKernel &operator=(const NEPriorBoxLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEPriorBoxLayerKernel(NEPriorBoxLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEPriorBoxLayerKernel &operator=(NEPriorBoxLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEPriorBoxLayerKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1
+ * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input
+ * @param[in] info Prior box layer info.
+ */
+ void configure(const ITensor *input1, const ITensor *input2, ITensor *output, const PriorBoxLayerInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEPriorBoxLayerKernel
+ *
+ * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1
+ * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input
+ * @param[in] info Prior box layer info.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Stores the coordinates of the calculated prior boxes.
+ *
+ * @param[out] out Output pointer.
+ * @param[in] offset Output offset to write to.
+ * @param[in] center_x Center pixel value on x-axis.
+ * @param[in] center_y Center pixel value on y-axis.
+ * @param[in] box_width Prior box width.
+ * @param[in] box_height Prior box height.
+ * @param[in] width Input width.
+ * @param[in] height Input height.
+ */
+ void store_coordinates(float *out, const int offset, const float center_x, const float center_y, const float box_width, const float box_height, const int width, const int height);
+ /** Function to calculate prior boxes.
+ *
+ * @param[in] window Input region on which to execute the kernel.
+ */
+ void calculate_prior_boxes(const Window &window);
+
+ const ITensor *_input1;
+ const ITensor *_input2;
+ ITensor *_output;
+ PriorBoxLayerInfo _info;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
+#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
--- /dev/null
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H
+#define ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+#include <functional>
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform layer normalization */
+class NEQLSTMLayerNormalizationKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEQLSTMLayerNormalizationKernel";
+ }
+ /** Default constructor */
+ NEQLSTMLayerNormalizationKernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEQLSTMLayerNormalizationKernel(const NEQLSTMLayerNormalizationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEQLSTMLayerNormalizationKernel &operator=(const NEQLSTMLayerNormalizationKernel &) = delete;
+ /** Default Move Constructor. */
+ NEQLSTMLayerNormalizationKernel(NEQLSTMLayerNormalizationKernel &&) = default;
+ /** Default move assignment operator */
+ NEQLSTMLayerNormalizationKernel &operator=(NEQLSTMLayerNormalizationKernel &&) = default;
+ /** Default destructor */
+ ~NEQLSTMLayerNormalizationKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: QSYMM16.
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] weight Weight tensor. Data types supported: Same as @p input.
+ * @param[in] bias Bias tensor. Data types supported: S32
+ */
+ void configure(const ITensor *input, ITensor *output, const ITensor *weight, const ITensor *bias);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayerNormalizationKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: QSYMM16.
+ * @param[in] output Destination tensor info. Data types supported: Same as @p input.
+ * @param[in] weight Weight tensor info. Data types supported: Same as @p input.
+ * @param[in] bias Bias tensor info. Data types supported: S32
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias);
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ // constants
+ static constexpr uint32_t max_input_dimension{ 2 }; /**< The maximum input dimension supported */
+ static constexpr uint32_t max_weight_dimension{ 1 }; /**< The maximum weight dimension supported */
+ static constexpr uint32_t max_bias_dimension{ 1 }; /**< The maximum bias dimension supported */
+ static constexpr uint32_t vector_size_byte{ 16 }; /**< Computation vector size in byte */
+
+ using ComputeFuncType = std::function<void(NEQLSTMLayerNormalizationKernel &)>;
+
+ ComputeFuncType _fn{}; /**< Function pointer to computation function */
+
+ const ITensor *_input
+ {
+ nullptr
+ }; /**< Input tensor */
+ const ITensor *_weight
+ {
+ nullptr
+ }; /**< Weight tensor */
+ const ITensor *_bias
+ {
+ nullptr
+ }; /**< Bias tensor */
+ ITensor *_output{ nullptr }; /**< Output tensor */
+
+ int32_t _output_multiplier{}; /**< Multiplier for output values */
+ int32_t _output_shift{}; /**< Shift value for output values */
+
+ int32_t _window_start_x{}; /**< The beginning of x-axis iteration */
+ int32_t _window_end_x{}; /**< The end of x-axis iteration */
+ int32_t _window_step_x{}; /**< The size of x-axis iteration's step */
+
+ Window _inout_window{}; /**< Window for input and output tensor */
+ Window _weight_window{}; /**< Window for weight and bias tensor */
+
+ /** Function to configure initial windows for destination of computation
+ *
+ * @param[in] Target destination tensor to use for output window
+ *
+ * @return configured window
+ */
+ Window configure_window(ITensor *target);
+ // Function to compute for data type QSYMM16
+ void compute_qsymm16();
+ /** Function to compute summation and summation of squared input of the given input pointer
+ *
+ * @param[in] Input_ptr pointer to input array
+ *
+ */
+ std::pair<int64_t, int64_t> sum_qsymm16(const int16_t *input_ptr);
+ /** Function to normalize values using computed mean and standard deviation
+ *
+ * @param[in] input_ptr Pointer to input array
+ * @param[in] output_ptr Pointer to output array
+ * @param[in] weight_ptr Pointer to weight array
+ * @param[in] bias_ptr Pointer to bias array
+ * @param[in] mean Mean value
+ * @param[in] inv_std_mul Quantized multiplier for standard deviation
+ * @param[in] inv_std_shift Shift for standard deviation
+ *
+ */
+ void normalize_qasymm16(const int16_t *input_ptr,
+ int16_t *output_ptr,
+ const int16_t *weight_ptr,
+ const int32_t *bias_ptr,
+ int32_t mean, int32_t inv_std_mul, int32_t inv_std_shift);
+ /** Function to compute output quantization information */
+ QuantizationInfo compute_output_qinfo();
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h"
+#include "src/core/NEON/kernels/NEQuantizationLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H
+#define ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the quantization layer kernel.
+ *
+ * @note The implementation supports only 3D input tensors
+ *
+ */
+class NEQuantizationLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEQuantizationLayerKernel";
+ }
+ /** Default constructor */
+ NEQuantizationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEQuantizationLayerKernel(const NEQuantizationLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEQuantizationLayerKernel &operator=(const NEQuantizationLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ NEQuantizationLayerKernel(NEQuantizationLayerKernel &&) = default;
+ /** Default move assignment operator */
+ NEQuantizationLayerKernel &operator=(NEQuantizationLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEQuantizationLayerKernel() = default;
+ /** Set the input, output.
+ *
+ * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+ * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+ *
+ * @note Output auto initialization is not supported by this kernel
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+ * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Common signature for all the specialised @ref NEQuantizationLayerKernel functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window);
+ /** Function to apply QASYMM8 or QASYMM8_SIGNED quantization on a tensor.
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ template <typename TIn, typename TOut>
+ void run_quantize_qasymm8(const Window &window);
+ /** Function to apply QASYMM16 quantization on a tensor.
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ template <typename T>
+ void run_quantize_qasymm16(const Window &window);
+
+ const ITensor *_input;
+ ITensor *_output;
+
+ QuantizationFunctionExecutorPtr _func;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h"
+#include "src/core/NEON/kernels/NEROIAlignLayerKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H
+#define ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the RoIAlign kernel.
+ */
+class NEROIAlignLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEROIAlignLayerKernel";
+ }
+
+ /** Constructor */
+ NEROIAlignLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEROIAlignLayerKernel(const NEROIAlignLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEROIAlignLayerKernel &operator=(const NEROIAlignLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ NEROIAlignLayerKernel(NEROIAlignLayerKernel &&) = default;
+ /** Default move assignment operator. */
+ NEROIAlignLayerKernel &operator=(NEROIAlignLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEROIAlignLayerKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
+ * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+ * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
+ * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, otherwise same as @p input
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+ *
+ * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+ * width and pooled height.
+ * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+ * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+ */
+ void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEROIAlignLayerKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
+ * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8,
+ * otherwise same as @p input
+ * @param[in] output Destination tensor info. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+ *
+ * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+ * width and pooled height.
+ * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+ * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+ *
+ * @return a Status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ template <DataLayout data_layout, typename input_data_type, typename roi_data_type = input_data_type>
+ void internal_run(const Window &window, const ThreadInfo &info);
+
+ const ITensor *_input;
+ ITensor *_output;
+ const ITensor *_rois;
+ ROIPoolingLayerInfo _pool_info;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H*/
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h"
+#include "src/core/NEON/kernels/NEROIPoolingLayerKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H
+#define ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+#include "arm_compute/core/IArray.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the ROI pooling layer kernel */
+class NEROIPoolingLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEROIPoolingLayerKernel";
+ }
+ /** Default constructor */
+ NEROIPoolingLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEROIPoolingLayerKernel(const NEROIPoolingLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEROIPoolingLayerKernel &operator=(const NEROIPoolingLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEROIPoolingLayerKernel(NEROIPoolingLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEROIPoolingLayerKernel &operator=(NEROIPoolingLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEROIPoolingLayerKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: F32.
+ * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+ * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+ *
+ * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled
+ * width and pooled height.
+ * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+ * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois tensor.
+ */
+ void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ const ITensor *_rois;
+ ITensor *_output;
+ ROIPoolingLayerInfo _pool_info;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NERangeKernel.h"
+#include "src/core/NEON/kernels/NERangeKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NERANGEKERNEL_H
+#define ARM_COMPUTE_NERANGEKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Kernel class for Range
+ *
+ * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments
+ * of 'step' up to but not including 'end'.
+ */
+class NERangeKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NERangeKernel";
+ }
+ /** Default constructor */
+ NERangeKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NERangeKernel(const NERangeKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NERangeKernel &operator=(const NERangeKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NERangeKernel(NERangeKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NERangeKernel &operator=(NERangeKernel &&) = default;
+ /** Default destructor */
+ ~NERangeKernel() = default;
+ /** Initialize the kernel's output tensor, start, end and step of the sequence.
+ *
+ * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[in] start The starting value of the sequence.
+ * @param[in] end The ending (not including) value of the sequence.
+ * @param[in] step The gap between each pair of values in the sequence.
+ */
+ void configure(ITensor *output, float start, float end, float step);
+ /** Static function to check if given info will lead to a valid configuration of @ref NERangeKernel
+ *
+ * @param[in] output Output tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[in] start The starting value of the sequence.
+ * @param[in] end The ending (not including) value of the sequence.
+ * @param[in] step The gap between each pair of values in the sequence.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *output, float start, float end, float step);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ using RangeFunction = void(ITensor *output, float start, float step, const Window &window);
+
+ RangeFunction *_func; /**< Range function to be called */
+ float _start; /**< Start of sequence */
+ float _end; /**< End of sequence */
+ float _step; /**< Increment/step value */
+ ITensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NERANGEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/NEON/NEMath.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H
+#define ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform a reduction operation
+ *
+ * @note For ARG_MIN/ARG_MAX reduction, the default data type for an uninitialized
+ * output tensor is signed 32-bit integer (S32). It is the user's responsibility
+ * to check that the results do not overflow because the indices are computed
+ * in unsigned 32-bit (U32).
+ */
+class NEReductionOperationKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEReductionOperationKernel";
+ }
+ /** Default constructor */
+ NEReductionOperationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReductionOperationKernel(const NEReductionOperationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReductionOperationKernel &operator=(const NEReductionOperationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEReductionOperationKernel(NEReductionOperationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEReductionOperationKernel &operator=(NEReductionOperationKernel &&) = default;
+ /** Default destructor */
+ ~NEReductionOperationKernel() = default;
+
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW.
+ * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0
+ * @param[in] op Reduction operation to perform.
+ */
+ void configure(const ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel.
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW.
+ * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0
+ * @param[in] op Reduction operation to perform.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+ unsigned int _reduction_axis;
+ ReductionOperation _op;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NERemapKernel.h"
+#include "src/core/NEON/kernels/NERemapKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEREMAPKERNEL_H
+#define ARM_COMPUTE_NEREMAPKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform a remap on a tensor */
+class NERemapKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NERemapKernel";
+ }
+ /** Default constructor */
+ NERemapKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NERemapKernel(const NERemapKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NERemapKernel &operator=(const NERemapKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NERemapKernel(NERemapKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NERemapKernel &operator=(NERemapKernel &&) = default;
+ /** Default destructor */
+ ~NERemapKernel() = default;
+
+ /** Initialize the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[in] map_x Map for X coordinates. Data type supported: F32.
+ * @param[in] map_y Map for Y coordinates. Data type supported: F32.
+ * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
+ * @param[in] policy The interpolation type.
+ */
+ void configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ /** function to perform nearest interpolation on the given window */
+ void remap_nearest(const Window &window);
+ /** function to perform bilinear interpolation on the given window */
+ void remap_bilinear(const Window &window);
+ /** Remap function to use for the particular interpolation type passed to configure() */
+ void (NERemapKernel::*_func)(const Window &window);
+
+ const ITensor *_input; /**< Input image */
+ ITensor *_output; /**< Output image */
+ const ITensor *_map_x; /**< Input remap x coordinates */
+ const ITensor *_map_y; /**< Input remap y coordinates */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEREMAPKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEReorgLayerKernel.h"
+#include "src/core/NEON/kernels/NEReorgLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEREORGLAYERKERNEL_H
+#define ARM_COMPUTE_NEREORGLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the kernel to perform tensor re-organization */
+class NEReorgLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEReorgLayerKernel";
+ }
+ /** Default constructor */
+ NEReorgLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReorgLayerKernel(const NEReorgLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReorgLayerKernel &operator=(const NEReorgLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ NEReorgLayerKernel(NEReorgLayerKernel &&) = default;
+ /** Default move assignment operator */
+ NEReorgLayerKernel &operator=(NEReorgLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEReorgLayerKernel() = default;
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: All
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] stride Stride to be used during data re-organization.
+ * It defines the spatial distance between 2 consecutive pixels in the x and y direction
+ */
+ void configure(const ITensor *input, ITensor *output, int32_t stride);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel
+ *
+ * @param[in] input Source tensor info. Data type supported: All
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] stride Stride to be used during data re-organization
+ * It defines the spatial distance between 2 consecutive pixels in the x and y direction
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+ int32_t _stride;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEREORGLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
+#include "src/core/NEON/kernels/NEReshapeLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "src/core/AccessWindowStatic.h"
#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NERESHAPELAYERKERNEL_H
+#define ARM_COMPUTE_NERESHAPELAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the kernel to perform tensor reshaping */
+class NEReshapeLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEReshapeLayerKernel";
+ }
+ /** Default constructor */
+ NEReshapeLayerKernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReshapeLayerKernel(const NEReshapeLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReshapeLayerKernel &operator=(const NEReshapeLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEReshapeLayerKernel(NEReshapeLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEReshapeLayerKernel &operator=(NEReshapeLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEReshapeLayerKernel() = default;
+ /** Set the input and output info of the kernel
+ *
+ * @param[in] input Source tensor info. Data type supported: All
+ * @param[out] output Destination tensor info. Data type supported: Same as @p input
+ */
+ void configure(const ITensorInfo *input, ITensorInfo *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel
+ *
+ * @param[in] input Source tensor info. Data type supported: All
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NERESHAPELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEReverseKernel.h"
+#include "src/core/NEON/kernels/NEReverseKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEREVERSEKERNEL_H
+#define ARM_COMPUTE_NEREVERSEKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the reverse layer kernel. */
+class NEReverseKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEReverseKernel";
+ }
+ /** Default constructor */
+ NEReverseKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReverseKernel(const NEReverseKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReverseKernel &operator=(const NEReverseKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEReverseKernel(NEReverseKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEReverseKernel &operator=(NEReverseKernel &&) = default;
+ /** Default destructor */
+ ~NEReverseKernel() = default;
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
+ */
+ void configure(const ITensor *input, ITensor *output, const ITensor *axis);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: All
+ * @param[in] output Output tensor info. Data type supported: Same as @p input
+ * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+ const ITensor *_axis;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEREVERSEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
+#include "src/core/NEON/kernels/NEScaleKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Window.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NESCALEKERNEL_H
+#define ARM_COMPUTE_NESCALEKERNEL_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform scaling on a tensor */
+class NEScaleKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEScaleKernel";
+ }
+ /** Default constructor */
+ NEScaleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEScaleKernel(const NEScaleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEScaleKernel &operator=(const NEScaleKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEScaleKernel(NEScaleKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEScaleKernel &operator=(NEScaleKernel &&) = default;
+ /** Default destructor */
+ ~NEScaleKernel() = default;
+
+ /** Initialise the kernel's inputs, output and interpolation policy
+ *
+ * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
+ * @note Using @p policy Area only supports data layout NCHW and input data type U8.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32.
+ * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
+ * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
+ * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
+ * @param[out] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] info @ref ScaleKernelInfo to use for configuration
+ */
+ void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output,
+ const ScaleKernelInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEScaleKernel
+ *
+ * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
+ * @note Using @p policy Area only supports data layout NCHW and input data type U8.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32.
+ * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
+ * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
+ * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
+ * @param[in] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] info @ref ScaleKernelInfo to use for validation
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy, const ITensorInfo *offsets, ITensorInfo *output,
+ const ScaleKernelInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** function to perform scale using area interpolation on the given window
+ *
+ * @note Used only in case down-sampling.
+ */
+ void scale_area_nchw_u8(const Window &window);
+
+ /** function to perform scale using bilinear interpolation on the given window */
+ template <typename T>
+ void scale_bilinear_nchw(const Window &window);
+ /** function to perform scale using bilinear interpolation on the given window */
+ template <typename T>
+ void scale_bilinear_nhwc(const Window &window);
+ /** function to perform scale using bilinear interpolation on the given window */
+ template <typename T>
+ void scale_bilinear_qasymm(const Window &window);
+
+ /** function to perform scale using nearest neighbour on the given window */
+ template <typename T>
+ void scale_nearest_nchw(const Window &window);
+ /** function to perform scale using nearest neighbour on the given window */
+ template <typename T>
+ void scale_nearest_nhwc(const Window &window);
+
+ /** Scale function to use for the particular function to use */
+ using ScaleFunctionPtr = void (NEScaleKernel::*)(const Window &window);
+
+ ScaleFunctionPtr _func;
+ const ITensor *_offsets;
+ const ITensor *_dx;
+ const ITensor *_dy;
+ const ITensor *_input;
+ ITensor *_output;
+ InterpolationPolicy _policy;
+ BorderMode _border_mode;
+ PixelValue _constant_border_value;
+ float _sampling_offset;
+ bool _align_corners;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NESCALEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h"
+#include "src/core/NEON/kernels/NEScharr3x3Kernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NESCHARR3x3KERNEL_H
+#define ARM_COMPUTE_NESCHARR3x3KERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to run a 3x3 Scharr filter on a tensor.
+ *
+* @f[
+* \mathbf{G}_x=\begin{vmatrix}
+* -3 & 0 & +3\\
+* -10& 0 & +10\\
+* -3 & 0 & +3
+* \end{vmatrix}
+* @f]
+*/
+class NEScharr3x3Kernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEScharr3x3Kernel";
+ }
+ /** Default constructor */
+ NEScharr3x3Kernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEScharr3x3Kernel(const NEScharr3x3Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEScharr3x3Kernel &operator=(const NEScharr3x3Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEScharr3x3Kernel(NEScharr3x3Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEScharr3x3Kernel &operator=(NEScharr3x3Kernel &&) = default;
+ /** Default destructor */
+ ~NEScharr3x3Kernel() = default;
+
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ bool _run_scharr_x; /**< Do we need to run Scharr X ? */
+ bool _run_scharr_y; /**< Do we need to run Scharr Y ? */
+ const ITensor *_input; /**< Input tensor */
+ ITensor *_output_x; /**< Output tensor for scharr X */
+ ITensor *_output_y; /**< Output tensor for scharr Y */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NESCHARR3x3KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NESelectKernel.h"
+#include "src/core/NEON/kernels/NESelectKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NESELECTKERNEL_H
+#define ARM_COMPUTE_NESELECTKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the select kernel
+ *
+ * Select is computed by:
+ * @f[ output(i) = condition(i) ? x(i) : y(i) @f]
+ *
+ */
+class NESelectKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NESelectKernel";
+ }
+ /** Default constructor */
+ NESelectKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESelectKernel(const NESelectKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESelectKernel &operator=(const NESelectKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NESelectKernel(NESelectKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NESelectKernel &operator=(NESelectKernel &&) = default;
+ /** Default destructor */
+ ~NESelectKernel() = default;
+
+ /** Common signature for all the specialised elementwise functions
+ *
+ * @param[in] c Condition input tensor. Data types supported: U8.
+ * @param[in] x First input tensor. Data types supported: All.
+ * @param[out] y Second input tensor. Data types supported: Same as @p x
+ * @param[in] output Output tensor. Data types supported: Same as @p x
+ */
+ void configure(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output);
+
+ /** Validate the argument passed to the kernel
+ *
+ * @param[in] c Condition input tensor. Data types supported: U8.
+ * @param[in] x First input tensor. Data types supported: All.
+ * @param[in] y Second input tensor. Data types supported: Same as @p x
+ * @param[in] output Output tensor. Data types supported: Same as @p x.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Common signature for all the specialised select functions
+ *
+ * @param[in] c Condition input tensor. Data types supported: U8.
+ * @param[in] x First input tensor. Data types supported: All.
+ * @param[in] y Second input tensor. Data types supported: Same as @p x
+ * @param[in] output Output tensor. Data types supported: Same as @p x.
+ */
+ using SelectFunction = void(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window);
+
+ /** Select function to use for the particular tensor types passed to configure() */
+ SelectFunction *_function;
+ const ITensor *_c; /**< Condition tensor */
+ const ITensor *_x; /**< Source tensor 1 */
+ const ITensor *_y; /**< Source tensor 2 */
+ ITensor *_output; /**< Destination tensor */
+ bool _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NESELECTKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h"
+#include "src/core/NEON/kernels/NESobel3x3Kernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NESOBEL3x3KERNEL_H
+#define ARM_COMPUTE_NESOBEL3x3KERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to run a 3x3 Sobel X filter on a tensor.
+ *
+ * @f[
+ * \mathbf{G}_x=\begin{vmatrix}
+ * -1 & 0 & +1\\
+ * -2 & 0 & +2\\
+ * -1 & 0 & +1
+ * \end{vmatrix}
+ * @f]
+*/
+class NESobel3x3Kernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NESobel3x3Kernel";
+ }
+ /** Default constructor */
+ NESobel3x3Kernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESobel3x3Kernel(const NESobel3x3Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESobel3x3Kernel &operator=(const NESobel3x3Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NESobel3x3Kernel(NESobel3x3Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NESobel3x3Kernel &operator=(NESobel3x3Kernel &&) = default;
+ /** Default destructor */
+ ~NESobel3x3Kernel() = default;
+
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ bool _run_sobel_x; /**< Do we need to run Sobel X ? */
+ bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
+ const ITensor *_input; /**< Input tensor */
+ ITensor *_output_x; /**< Output tensor for sobel X */
+ ITensor *_output_y; /**< Output tensor for sobel Y */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NESOBEL3x3KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h"
+#include "src/core/NEON/kernels/NESobel5x5Kernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NESOBEL5x5KERNEL_H
+#define ARM_COMPUTE_NESOBEL5x5KERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor.
+ *
+ */
+class NESobel5x5HorKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NESobel5x5HorKernel";
+ }
+ /** Default constructor */
+ NESobel5x5HorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESobel5x5HorKernel(const NESobel5x5HorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESobel5x5HorKernel &operator=(const NESobel5x5HorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NESobel5x5HorKernel(NESobel5x5HorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NESobel5x5HorKernel &operator=(NESobel5x5HorKernel &&) = default;
+ /** Default destructor */
+ ~NESobel5x5HorKernel() = default;
+
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @note At least one of output_x or output_y must be set
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ const ITensor *_input; /**< Input tensor */
+ ITensor *_output_x; /**< X output of horizontal pass */
+ ITensor *_output_y; /**< Y output of horizontal pass */
+ bool _run_sobel_x; /**< Do we need to run Sobel X? */
+ bool _run_sobel_y; /**< Do we need to run Sobel Y? */
+ BorderSize _border_size; /**< Border size */
+};
+
+/** Interface for the kernel to run the vertical pass of 5x5 Sobel Y filter on a tensor.
+ *
+*/
+class NESobel5x5VertKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NESobel5x5VertKernel";
+ }
+ /** Default constructor */
+ NESobel5x5VertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESobel5x5VertKernel(const NESobel5x5VertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESobel5x5VertKernel &operator=(const NESobel5x5VertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NESobel5x5VertKernel(NESobel5x5VertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NESobel5x5VertKernel &operator=(NESobel5x5VertKernel &&) = default;
+ /** Default destructor */
+ ~NESobel5x5VertKernel() = default;
+
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] input_x Input for X (X output of hor pass). Data type supported: S16.
+ * @param[in] input_y Input for Y (Y output of hor pass). Data type supported: S16.
+ * @param[out] output_x Destination tensor for the X gradient. Data type supported: S16.
+ * @param[out] output_y Destination tensor for the Y gradient. Data type supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ ITensor *_input_x; /**< X input (X output of the hor pass) */
+ ITensor *_input_y; /**< Y input (Y output of the hor pass) */
+ ITensor *_output_x; /**< X output of sobel */
+ ITensor *_output_y; /**< Y output of sobel */
+ bool _run_sobel_x; /**< Do we need to run sobel X? */
+ bool _run_sobel_y; /**< Do we need to run sobel Y? */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NESOBEL5x5KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h"
+#include "src/core/NEON/kernels/NESobel7x7Kernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NESOBEL7x7KERNEL_H
+#define ARM_COMPUTE_NESOBEL7x7KERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor.
+ *
+ */
+class NESobel7x7HorKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NESobel7x7HorKernel";
+ }
+ /** Default constructor */
+ NESobel7x7HorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESobel7x7HorKernel(const NESobel7x7HorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESobel7x7HorKernel &operator=(const NESobel7x7HorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NESobel7x7HorKernel(NESobel7x7HorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NESobel7x7HorKernel &operator=(NESobel7x7HorKernel &&) = default;
+ /** Default destructor */
+ ~NESobel7x7HorKernel() = default;
+
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ const ITensor *_input; /**< Input tensor */
+ ITensor *_output_x; /**< X output of horizontal pass */
+ ITensor *_output_y; /**< Y output of horizontal pass */
+ bool _run_sobel_x; /**< Do we need to run Sobel X? */
+ bool _run_sobel_y; /**< Do we need to run Sobel Y? */
+ BorderSize _border_size; /**< Border size */
+};
+
+/** Interface for the kernel to run the vertical pass of 7x7 Sobel Y filter on a tensor.
+ *
+*/
+class NESobel7x7VertKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NESobel7x7VertKernel";
+ }
+ /** Default constructor */
+ NESobel7x7VertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESobel7x7VertKernel(const NESobel7x7VertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESobel7x7VertKernel &operator=(const NESobel7x7VertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NESobel7x7VertKernel(NESobel7x7VertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NESobel7x7VertKernel &operator=(NESobel7x7VertKernel &&) = default;
+ /** Default destructor */
+ ~NESobel7x7VertKernel() = default;
+
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @note At least one of output_x or output_y must be set
+ * @note If output_x is set then input_x must be set too
+ * @note If output_y is set then input_y must be set too
+ *
+ * @param[in] input_x (Optional) Input for X (X output of hor pass). Data type supported: S32.
+ * @param[in] input_y (Optional) Input for Y (Y output of hor pass). Data type supported: S32.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ITensor *input_x, const ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ const ITensor *_input_x; /**< X input (X output of the hor pass) */
+ const ITensor *_input_y; /**< Y input (Y output of the hor pass) */
+ ITensor *_output_x; /**< X output of sobel */
+ ITensor *_output_y; /**< Y output of sobel */
+ bool _run_sobel_x; /**< Do we need to run sobel X? */
+ bool _run_sobel_y; /**< Do we need to run sobel Y? */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NESOBEL7x7KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
+#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H
+#define ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the identifying the max value of 1D Logits */
+class NELogits1DMaxKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NELogits1DMaxKernel";
+ }
+ /** Default constructor */
+ NELogits1DMaxKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogits1DMaxKernel(const NELogits1DMaxKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogits1DMaxKernel &operator=(const NELogits1DMaxKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NELogits1DMaxKernel(NELogits1DMaxKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NELogits1DMaxKernel &operator=(NELogits1DMaxKernel &&) = default;
+ /** Default destructor */
+ ~NELogits1DMaxKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[out] output Destination tensor. Data types supported: same as @p input
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DMaxKernel
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] output Destination tensor. Data types supported: same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ using Logits1DMaxFunction = void(const ITensor &in, ITensor &out, const Window &window);
+
+private:
+ Logits1DMaxFunction *_func;
+ BorderSize _border_size;
+};
+
+/** Interface for softmax computation for QASYMM8 with pre-computed max. */
+template <bool IS_LOG = false>
+class NELogits1DSoftmaxKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ if(IS_LOG)
+ {
+ return "NELogits1DSoftmaxKernel";
+ }
+ else
+ {
+ return "NELogits1DLogSoftmaxKernel";
+ }
+ }
+ /** Default constructor */
+ NELogits1DSoftmaxKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogits1DSoftmaxKernel(const NELogits1DSoftmaxKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NELogits1DSoftmaxKernel &operator=(const NELogits1DSoftmaxKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NELogits1DSoftmaxKernel(NELogits1DSoftmaxKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NELogits1DSoftmaxKernel &operator=(NELogits1DSoftmaxKernel &&) = default;
+ /** Default destructor */
+ ~NELogits1DSoftmaxKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] max Max values tensor. Same shape as input with dimension 0 set to 1.
+ * Data types supported: same as @p input.
+ * @param[out] output Destination tensor. Data types supported: same as @p input.
+ * @param[in] beta A scaling factor for the exponent.
+ *
+ * @param tmp Auxiliary tensor. Must be type F32 and same shape as the input.
+ */
+ void configure(const ITensor *input, const ITensor *max, ITensor *output, const float beta, ITensor *tmp);
+ /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DSoftmaxKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] max Max values tensor info. Same shape as input with dimension 0 set to 1.
+ * Data types supported: same as @p input.
+ * @param[in] output Destination tensor info. Data types supported: same as @p input.
+ * @param[in] beta A scaling factor for the exponent.
+ * @param[in] tmp Tensor info of auxiliary. Must be type F32 and same shape as the input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *max,
+ const ITensorInfo *output, const float beta, const ITensorInfo *tmp);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ using LogitsSoftmaxFunction = void(const ITensor &in, const ITensor &max, void *const tmp, ITensor &out, const float beta,
+ const Window &window);
+
+ LogitsSoftmaxFunction *_func;
+ const ITensor *_input;
+ const ITensor *_max;
+ ITensor *_output;
+ float _beta;
+ ITensor *_tmp; //Temporary. Used internally
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
+#include "src/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H
+#define ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declaration
+class ITensor;
+
+/** Interface for the space to batch kernel */
+class NESpaceToBatchLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NESpaceToBatchLayerKernel";
+ }
+ /** Default constructor */
+ NESpaceToBatchLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESpaceToBatchLayerKernel(const NESpaceToBatchLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESpaceToBatchLayerKernel &operator=(const NESpaceToBatchLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NESpaceToBatchLayerKernel(NESpaceToBatchLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NESpaceToBatchLayerKernel &operator=(NESpaceToBatchLayerKernel &&) = default;
+ /** Default destructor */
+ ~NESpaceToBatchLayerKernel() = default;
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
+ * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output);
+ /** Initialise the kernel's input and output. (Static block shape and paddings)
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape_x Block shape x value.
+ * @param[in] block_shape_y Block shape y value.
+ * @param[in] padding_left The left padding of the output tensor.
+ * @param[in] padding_right The right padding of the output tensor.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
+ * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
+ * @param[in] output Tensor output. Data types supported: same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel (Static block shape and paddings)
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape_x Block shape x value.
+ * @param[in] block_shape_y Block shape y value.
+ * @param[in] padding_left The left padding of the output tensor.
+ * @param[in] padding_right The right padding of the output tensor.
+ * @param[in] output Tensor output. Data types supported: same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input; /**< Source tensor */
+ const ITensor *_block_shape; /**< Block shape tensor */
+ const ITensor *_paddings; /**< Paddings tensor */
+ ITensor *_output; /**< Destination tensor */
+ DataLayout _data_layout; /**< Data layout to be used at run-time */
+
+ Size2D _padding_left;
+ int _block_shape_x;
+ int _block_shape_y;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h"
+#include "src/core/NEON/kernels/NESpaceToDepthLayerKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H
+#define ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the space to depth kernel */
+class NESpaceToDepthLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NESpaceToDepthLayerKernel";
+ }
+ /** Default constructor */
+ NESpaceToDepthLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESpaceToDepthLayerKernel(const NESpaceToDepthLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESpaceToDepthLayerKernel &operator=(const NESpaceToDepthLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NESpaceToDepthLayerKernel(NESpaceToDepthLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NESpaceToDepthLayerKernel &operator=(NESpaceToDepthLayerKernel &&) = default;
+ /** Default destructor */
+ ~NESpaceToDepthLayerKernel() = default;
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ * @param[in] block_shape Block shape value
+ */
+ void configure(const ITensor *input, ITensor *output, int32_t block_shape);
+ /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToDepthLayerKernel
+ *
+ * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] output Tensor output info. Data types supported: same as @p input
+ * @param[in] block_shape Block shape value
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input; /**< Source tensor */
+ ITensor *_output; /**< Destination tensor */
+ int32_t _block_shape; /**< Block shape */
+ DataLayout _data_layout; /**< Data layout of the operation */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEStackLayerKernel.h"
+#include "src/core/NEON/kernels/NEStackLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_NESTACKLAYERKERNEL_H
+#define ARM_COMPUTE_NESTACKLAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/
+class NEStackLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEStackLayerKernel";
+ }
+ /** Default constructor */
+ NEStackLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEStackLayerKernel(const NEStackLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEStackLayerKernel &operator=(const NEStackLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEStackLayerKernel(NEStackLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEStackLayerKernel &operator=(NEStackLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEStackLayerKernel() = default;
+ /** Initialise the kernel's inputs and output
+ *
+ * @note Supported input tensor rank: up to 4
+ *
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
+ * @param[in] idx_input Index of the input tensor in the list of tensors to stack.
+ * All tensors in the list must have the same shape
+ * @param[in] num_tensors Number of tensors to stack
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ *
+ */
+ void configure(const ITensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEStackLayerKernel
+ *
+ * @note Supported input tensor rank: up to 4
+ *
+ * @param[in] input Input tensor info. Data types supported: All
+ * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
+ * @param[in] idx_input Index of the input tensor in the list of tensors to stack
+ * All tensors in the list must have the same shape
+ * @param[in] num_tensors Number of tensors to stack
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output);
+
+ // Inherited methods overridden
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+ unsigned int _axis;
+ unsigned int _idx_input;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NESTACKLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h"
+#include "src/core/NEON/kernels/NEStridedSliceKernel.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H
+#define ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the kernel to perform tensor strided slicing */
+class NEStridedSliceKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEStridedSliceKernel";
+ }
+ /** Default constructor */
+ NEStridedSliceKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEStridedSliceKernel(const NEStridedSliceKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEStridedSliceKernel &operator=(const NEStridedSliceKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEStridedSliceKernel(NEStridedSliceKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEStridedSliceKernel &operator=(NEStridedSliceKernel &&) = default;
+ /** Default destructor */
+ ~NEStridedSliceKernel() = default;
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] input Source tensor info. Data type supported: All
+ * @param[out] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
+ * A slice of size 1 starting from starts[i] in the dimension must be preserved.
+ */
+ void configure(const ITensorInfo *input, ITensorInfo *output,
+ const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
+ int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSliceKernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] input Source tensor info. Data type supported: All
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
+ * A slice of size 1 starting from starts[i] in the dimension must be preserved.
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
+ int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+
+private:
+ Coordinates _starts_abs; /**< Absolute start coordinates */
+ Coordinates _final_strides; /**< Final strides */
+ int32_t _shrink_mask; /**< Shrink axis mask */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h"
+#include "src/core/NEON/kernels/NETableLookupKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NETABLELOOKUPKERNEL_H
+#define ARM_COMPUTE_NETABLELOOKUPKERNEL_H
+
+#include "src/core/NEON/INESimpleKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+class ILut;
+
+/** Interface for the kernel to perform table lookup calculations. */
+class NETableLookupKernel : public INESimpleKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NETableLookupKernel";
+ }
+ /** Default constructor */
+ NETableLookupKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NETableLookupKernel(const NETableLookupKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NETableLookupKernel &operator=(const NETableLookupKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NETableLookupKernel(NETableLookupKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NETableLookupKernel &operator=(NETableLookupKernel &&) = default;
+ /** Default destructor */
+ ~NETableLookupKernel() = default;
+ /** Initialise the kernel's input, lut and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8/S16.
+ * @param[in] lut The input LUT.
+ * @param[out] output The output tensor. Data types supported: same as @p input
+ */
+ void configure(const ITensor *input, const ILut *lut, ITensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Perform table lookup on a given window.
+ *
+ * @param window window Region on which to execute the kernel.
+ */
+ template <class T>
+ void tableLookup(const Window &window);
+ /** Common signature for all the specialised lut functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using TableLookupFunction = void (NETableLookupKernel::*)(const Window &window);
+ /** Sub function to use for the particular tensor types passed to configure() */
+ TableLookupFunction _func;
+ const ILut *_lut;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NETABLELOOKUPKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h"
+#include "src/core/NEON/kernels/NEThresholdKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NETHRESHOLDKERNEL_H
+#define ARM_COMPUTE_NETHRESHOLDKERNEL_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the thresholding kernel */
+class NEThresholdKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEThresholdKernel";
+ }
+ /** Default constructor */
+ NEThresholdKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEThresholdKernel(const NEThresholdKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEThresholdKernel &operator=(const NEThresholdKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEThresholdKernel(NEThresholdKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEThresholdKernel &operator=(NEThresholdKernel &&) = default;
+ /** Default destructor */
+ ~NEThresholdKernel() = default;
+ /** Initialise the kernel's input, output and threshold parameters.
+ *
+ * @param[in] input An input tensor. Data type supported: U8
+ * @param[out] output The output tensor. Data type supported: U8.
+ * @param[in] info Threshold kernel descriptor
+ */
+ void configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEThresholdKernel
+ *
+ * @param[in] input Input tensor info. Data type supported: U8
+ * @param[in] output Output tensor info. Data type supported: U8
+ * @param[in] info Threshold kernel descriptor
+ *
+ * @return A status containing an error code in case of failure
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** run binary thresholding on the given window */
+ void run_binary(const Window &window);
+ /** run range thresholding on the given window */
+ void run_range(const Window &window);
+
+ void (NEThresholdKernel::*_func)(const Window &window);
+
+ const ITensor *_input; /**< Input */
+ ITensor *_output; /**< Output */
+ ThresholdKernelInfo _info; /**< Threshold descriptor */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NETileKernel.h"
+#include "src/core/NEON/kernels/NETileKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NETILEKERNEL_H
+#define ARM_COMPUTE_NETILEKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform a tile operation */
+class NETileKernel : public INEKernel
+{
+public:
+ /** Default constructor */
+ NETileKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NETileKernel(const NETileKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NETileKernel &operator=(const NETileKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NETileKernel(NETileKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NETileKernel &operator=(NETileKernel &&) = default;
+ /** Default destructor */
+ ~NETileKernel() = default;
+ const char *name() const override
+ {
+ return "NETileKernel";
+ }
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: All.
+ * @param[out] output Destination tensor. Same as @p input
+ * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
+ */
+ void configure(const ITensor *input, ITensor *output, const Multiples &multiples);
+ /** Static function to check if given info will lead to a valid configuration of @ref NETileKernel
+ *
+ * @param[in] input Source tensor info. Data type supported: All.
+ * @param[in] output Destination tensor info. Same as @p input
+ * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NETILEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
+#include "src/core/NEON/kernels/NETransposeKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NETRANSPOSEKERNEL_H
+#define ARM_COMPUTE_NETRANSPOSEKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel which transposes the elements of a matrix.
+ *
+ * [width, height, batch] -> [height, width, batch]
+ *
+ */
+class NETransposeKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NETransposeKernel";
+ }
+ /** Default constructor */
+ NETransposeKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NETransposeKernel(const NETransposeKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NETransposeKernel &operator=(const NETransposeKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NETransposeKernel(NETransposeKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NETransposeKernel &operator=(NETransposeKernel &&) = default;
+ /** Default destructor */
+ ~NETransposeKernel() = default;
+
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NETransposeKernel
+ *
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[in] output Output tensor. Data type supported: Same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Common signature for all the transpose functions
+ *
+ * @param[in] input An input tensor. Data types supported: All
+ * @param[out] output The output tensor. Data type supported: same as @p input
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using TransposeFunction = void(const ITensor *input, ITensor *output, const Window &window);
+ /** Transpose function to use for the particular tensor types passed to configure() */
+ TransposeFunction *_func;
+ const ITensor *_input;
+ ITensor *_output;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NETRANSPOSEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h"
+#include "src/core/NEON/kernels/NEUpsampleLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H
+#define ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the Upsample layer kernel.*/
+class NEUpsampleLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEUpsampleLayerKernel";
+ }
+ /** Default constructor */
+ NEUpsampleLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEUpsampleLayerKernel(const NEUpsampleLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEUpsampleLayerKernel &operator=(const NEUpsampleLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ NEUpsampleLayerKernel(NEUpsampleLayerKernel &&) = default;
+ /** Default move assignment operator */
+ NEUpsampleLayerKernel &operator=(NEUpsampleLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEUpsampleLayerKernel() = default;
+ /** Set the input output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[out] output Destination tensor. Data types supported: same as @p input.
+ * @param[in] info Contains stride information described in @ref Size2D.
+ * @param[in] policy Defines the policy to fill the intermediate pixels.
+ *
+ */
+ void configure(const ITensor *input, ITensor *output, const Size2D &info, const InterpolationPolicy policy);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEUpsampleLayerKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] output Destination tensor info. Data types supported: same as @p input.
+ * @param[in] info Contains stride information described in @ref Size2D.
+ * @param[in] policy Defines the policy to fill the intermediate pixels.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy policy);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Function to run upsample layer (NCHW)
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <typename T, int S>
+ void upsample_nchw(const Window &window);
+ /** Function to run upsample layer (NHWC)
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ */
+ template <typename T, int S>
+ void upsample_nhwc(const Window &window);
+
+ using UpsampleFunctionPtr = void (NEUpsampleLayerKernel::*)(const Window &window);
+
+private:
+ UpsampleFunctionPtr _func;
+ const ITensor *_input;
+ ITensor *_output;
+ Size2D _info;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
+#include "src/core/NEON/kernels/NEWarpKernel.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEWARPKERNEL_H
+#define ARM_COMPUTE_NEWARPKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+#include <array>
+#include <cstdint>
+namespace arm_compute
+{
+class ITensor;
+
+/** Common interface for warp affine and warp perspective */
+class INEWarpKernel : public INEKernel
+{
+public:
+ /** Default constructor */
+ INEWarpKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ INEWarpKernel(const INEWarpKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ INEWarpKernel &operator=(const INEWarpKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ INEWarpKernel(INEWarpKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ INEWarpKernel &operator=(INEWarpKernel &&) = default;
+ /** Default destructor */
+ ~INEWarpKernel() = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[out] output Destination tensor. Data type supported: U8.
+ * @param[in] matrix The perspective or affine matrix to use. Must be 2x3 for affine and 3x3 for perspective of type float.
+ * The matrix argument requires 9 values, for the affine case the last 3 values are ignored.
+ * @param[in] border_mode Strategy to use for borders
+ * @param[in] constant_border_value Constant value used for filling the border.
+ */
+ virtual void configure(const ITensor *input, ITensor *output, const std::array<float, 9> &matrix, BorderMode border_mode, uint8_t constant_border_value);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+
+protected:
+ /** function to perform warp affine or warp perspective on the given window when border mode == UNDEFINED
+ *
+ * @param[in] window Region on which to execute the kernel
+ */
+ virtual void warp_undefined(const Window &window) = 0;
+ /** function to perform warp affine or warp perspective on the given window when border mode == CONSTANT
+ *
+ * @param[in] window Region on which to execute the kernel
+ */
+ virtual void warp_constant(const Window &window) = 0;
+ /** function to perform warp affine or warp perspective on the given window when border mode == REPLICATE
+ *
+ * @param[in] window Region on which to execute the kernel
+ */
+ virtual void warp_replicate(const Window &window) = 0;
+ /** Common signature for all the specialised warp functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ void (INEWarpKernel::*_func)(const Window &window);
+
+ const ITensor *_input; /**< Input Tensor */
+ ITensor *_output; /**< Output Tensor */
+ uint8_t _constant_border_value; /**< Constant value used for filling the border. This value is used for those pixels out of the ROI when the border mode is CONSTANT */
+ std::array<float, 9> _matrix; /**< The affine or perspective matrix. Must be 2x3 for warp affine or 3x3 for warp perspective of type float. */
+};
+
+/** Template interface for the kernel to compute warp affine
+ *
+ */
+template <InterpolationPolicy interpolation>
+class NEWarpAffineKernel : public INEWarpKernel
+{
+private:
+ const char *name() const override
+ {
+ return "NEWarpAffineKernel";
+ }
+ // Inherited methods overridden:
+ void warp_undefined(const Window &window) override;
+ void warp_constant(const Window &window) override;
+ void warp_replicate(const Window &window) override;
+};
+
+/** Template interface for the kernel to compute warp perspective
+ *
+ */
+template <InterpolationPolicy interpolation>
+class NEWarpPerspectiveKernel : public INEWarpKernel
+{
+private:
+ const char *name() const override
+ {
+ return "NEWarpPerspectiveKernel";
+ }
+ // Inherited methods overridden:
+ void warp_undefined(const Window &window) override;
+ void warp_constant(const Window &window) override;
+ void warp_replicate(const Window &window) override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEWARPKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
+#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H
+#define ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform reshaping on the weights used by convolution and locally connected layer
+ *
+ * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels.
+ * In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication.
+ *
+ * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have:
+ * @f[
+ * \left( \begin{array}{ccc}
+ * a000 & a001 & a002 \\
+ * a010 & a011 & a012 \\
+ * a020 & a021 & a022 \\
+ * \end{array} \right)
+ * \left( \begin{array}{ccc}
+ * a100 & a101 & a102 \\
+ * a110 & a111 & a112 \\
+ * a120 & a121 & a122 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccc}
+ * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\
+ * \end{array} \right)
+ * @f]
+ */
+class NEWeightsReshapeKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEWeightsReshapeKernel";
+ }
+ /** Constructor.*/
+ NEWeightsReshapeKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEWeightsReshapeKernel(const NEWeightsReshapeKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEWeightsReshapeKernel &operator=(const NEWeightsReshapeKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEWeightsReshapeKernel(NEWeightsReshapeKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEWeightsReshapeKernel &operator=(NEWeightsReshapeKernel &&) = default;
+ /** Default destructor */
+ ~NEWeightsReshapeKernel() = default;
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared.
+ * Data types supported: All
+ * @param[in] bias The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
+ * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
+ * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
+ * @param[out] output The output tensor. Data types supported: Same as @p input
+ */
+ void configure(const ITensor *input, const ITensor *bias, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEWeightsReshapeKernel
+ *
+ * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared.
+ * Data types supported: All
+ * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
+ * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
+ * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
+ * @param[in] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ const ITensor *_bias;
+ ITensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H
+#define ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the width concatenate kernel.
+ * The input tensor will be concatenated into the output tensor.
+ */
+class NEWidthConcatenateLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEWidthConcatenateLayerKernel";
+ }
+ /** Default constructor */
+ NEWidthConcatenateLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEWidthConcatenateLayerKernel(const NEWidthConcatenateLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEWidthConcatenateLayerKernel &operator=(const NEWidthConcatenateLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEWidthConcatenateLayerKernel(NEWidthConcatenateLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEWidthConcatenateLayerKernel &operator=(NEWidthConcatenateLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEWidthConcatenateLayerKernel() = default;
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] input Input tensor info. Data types supported: All
+ * @param[in] width_offset The offset on the X axis.
+ * @param[in,out] output Output tensor info. Data types supported: Same as @p input.
+ */
+ void configure(const ITensorInfo *input, unsigned int width_offset, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: All
+ * @param[in] width_offset The offset on the X axis.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+
+private:
+ unsigned int _width_offset;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H */
#ifndef ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H
#define ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/NEON/kernels/convolution/common/convolution.hpp"
#include "src/core/NEON/kernels/convolution/common/tensor.hpp"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h"
+#include "src/core/NEON/kernels/NEYOLOLayerKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEYOLOLAYERKERNEL_H
+#define ARM_COMPUTE_NEYOLOLAYERKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the YOLO layer kernel. */
+class NEYOLOLayerKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NEYOLOLayerKernel";
+ }
+ /** Constructor */
+ NEYOLOLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEYOLOLayerKernel(const NEYOLOLayerKernel &) = delete;
+ /** Default move constructor */
+ NEYOLOLayerKernel(NEYOLOLayerKernel &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEYOLOLayerKernel &operator=(const NEYOLOLayerKernel &) = delete;
+ /** Default move assignment operator */
+ NEYOLOLayerKernel &operator=(NEYOLOLayerKernel &&) = default;
+ /** Default destructor */
+ ~NEYOLOLayerKernel() = default;
+ /** Set the input and output tensor.
+ *
+ * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
+ *
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: F16/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] act_info Activation layer parameters.
+ * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
+ */
+ void configure(ITensor *input, ITensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEYOLOLayerKernel
+ *
+ * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: same as @p input
+ * @param[in] act_info Activation layer information.
+ * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Function to run YOLO layer
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ template <typename T, int S>
+ void yolo_layer_nchw(const Window &window);
+ /** Function to run YOLO layer on tensors with NHWC format
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ template <typename T>
+ void yolo_layer_nhwc(const Window &window);
+ /** Common signature for all the yolo layer functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using YOLOFunctionPtr = void (NEYOLOLayerKernel::*)(const Window &window);
+
+private:
+ YOLOFunctionPtr _func;
+ ITensor *_input;
+ ITensor *_output;
+ ActivationLayerInfo _act_info;
+ int32_t _num_classes;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEYOLOLAYERKERNEL_H */
#ifndef SRC_INEGEMMWRAPPERKERNEL_H
#define SRC_INEGEMMWRAPPERKERNEL_H
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
namespace arm_compute
{
#ifndef SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H
#define SRC_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/core/NEON/kernels/convolution/depthwise/depthwise.hpp"
#ifndef ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H
#define ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H
-#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_gemm_compute_iface.hpp"
+#include "src/core/NEON/INEKernel.h"
#include "gemm_common.hpp"
#include "arm_compute/core/IPyramid.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/FunctionDescriptors.h"
#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "src/core/NEON/kernels/assembly/arm_gemm.hpp"
#include "utils/TypePrinter.h"
#include <array>
#include "arm_compute/graph/nodes/Nodes.h"
#include "arm_compute/runtime/CPP/CPPFunctions.h"
#include "arm_compute/runtime/NEON/NEFunctions.h"
+#include "src/core/NEON/NEKernels.h"
#include "support/Cast.h"
#include "support/ToolchainSupport.h"
#include "arm_compute/runtime/CPP/CPPFunctions.h"
#include "arm_compute/runtime/NEON/NEFunctions.h"
+#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
+#include "src/core/NEON/kernels/NEReshapeLayerKernel.h"
+#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
#include "support/Cast.h"
using namespace arm_compute::utils::cast;
* SOFTWARE.
*/
#include "arm_compute/runtime/NEON/INEOperator.h"
+#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/INEKernel.h"
namespace arm_compute
{
namespace experimental
{
+INEOperator::~INEOperator() = default;
+
INEOperator::INEOperator(IRuntimeContext *ctx)
: _kernel(), _ctx(ctx), _workspace()
{
/*
- * Copyright (c) 2016, 2017 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
*/
#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/CPP/ICPPKernel.h"
+#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+INESimpleFunction::~INESimpleFunction() = default;
INESimpleFunction::INESimpleFunction() // NOLINT
: _kernel(),
void INESimpleFunction::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
NEScheduler::get().schedule(_kernel.get(), Window::DimY);
}
+} //namespace arm_compute
*/
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/INEKernel.h"
#include "src/runtime/Utils.h"
namespace arm_compute
{
+INESimpleFunctionNoBorder::~INESimpleFunctionNoBorder() = default;
+
INESimpleFunctionNoBorder::INESimpleFunctionNoBorder(IRuntimeContext *ctx)
: _kernel(),
_ctx(ctx)
*/
#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h"
-#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
+#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+NEAbsoluteDifference::~NEAbsoluteDifference() = default;
void NEAbsoluteDifference::configure(const ITensor *input1, const ITensor *input2, ITensor *output)
{
k->configure(input1, input2, output);
_kernel = std::move(k);
}
+} // namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEAccumulate.h"
-#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h"
+#include "src/core/NEON/kernels/NEAccumulateKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+NEAccumulate::~NEAccumulate() = default;
void NEAccumulate::configure(const ITensor *input, ITensor *output)
{
_kernel = std::move(k);
}
+NEAccumulateWeighted::~NEAccumulateWeighted() = default;
+
void NEAccumulateWeighted::configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16)
{
if(use_fp16)
}
}
+NEAccumulateSquared::~NEAccumulateSquared() = default;
+
void NEAccumulateSquared::configure(const ITensor *input, uint32_t shift, ITensor *output)
{
auto k = arm_compute::support::cpp14::make_unique<NEAccumulateSquaredKernel>();
k->configure(input, shift, output);
_kernel = std::move(k);
}
+} // namespace arm_compute
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/runtime/IRuntimeContext.h"
#include "arm_compute/runtime/Tensor.h"
+#include "src/core/NEON/kernels/NEActivationLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
{
namespace experimental
{
+NEActivationLayer::~NEActivationLayer() = default;
+
void NEActivationLayer::configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info)
{
auto k = arm_compute::support::cpp14::make_unique<NEActivationLayerKernel>();
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
{
+NEArgMinMaxLayer::~NEArgMinMaxLayer() = default;
+
NEArgMinMaxLayer::NEArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _reduction_function(support::cpp14::make_unique<NEReductionOperation>())
{
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
+#include "src/core/NEON/kernels/NEArithmeticAdditionKernel.h"
#include "support/MemorySupport.h"
#include <utility>
{
namespace experimental
{
+NEArithmeticAddition::~NEArithmeticAddition() = default;
+
void NEArithmeticAddition::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_UNUSED(act_info);
#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
+#include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
#include "support/MemorySupport.h"
#include <utility>
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEBatchNormalizationLayer::~NEBatchNormalizationLayer() = default;
NEBatchNormalizationLayer::NEBatchNormalizationLayer()
: _norm_kernel()
ActivationLayerInfo act_info)
{
// Configure kernel
- _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon, act_info);
+ _norm_kernel = arm_compute::support::cpp14::make_unique<NEBatchNormalizationLayerKernel>();
+ _norm_kernel->configure(input, output, mean, var, beta, gamma, epsilon, act_info);
}
Status NEBatchNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *var, const ITensorInfo *beta, const ITensorInfo *gamma,
void NEBatchNormalizationLayer::run()
{
- NEScheduler::get().schedule(&_norm_kernel, Window::DimY);
+ NEScheduler::get().schedule(_norm_kernel.get(), Window::DimY);
}
+} // namespace arm_compute
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
#include "support/MemorySupport.h"
*/
#include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseAndKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/NEON/functions/NEBitwiseNot.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseNotKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseOrKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/NEON/functions/NEBitwiseXor.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h"
+#include "src/core/NEON/kernels/NEBitwiseXorKernel.h"
#include "support/MemorySupport.h"
#include <utility>
* SOFTWARE.
*/
#include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h"
+#include "src/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
#include "support/MemorySupport.h"
*/
#include "arm_compute/runtime/NEON/functions/NEBox3x3.h"
-#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEBox3x3Kernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NEBox3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value, bool use_fp16)
{
if(use_fp16)
k->configure(input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
}
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/NEON/functions/NESobel5x5.h"
#include "arm_compute/runtime/NEON/functions/NESobel7x7.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NECannyEdgeKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NESobel5x5Kernel.h"
+#include "src/core/NEON/kernels/NESobel7x7Kernel.h"
#include "support/MemorySupport.h"
#include <cstring>
#include <inttypes.h>
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+NECannyEdge::~NECannyEdge() = default;
NECannyEdge::NECannyEdge(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
_memory_group.manage(&_nonmax);
// Configure non-maxima suppression
- _non_max_suppr.configure(&_magnitude, &_phase, &_nonmax, upper_thr, lower_thr, border_mode == BorderMode::UNDEFINED);
+ _non_max_suppr = arm_compute::support::cpp14::make_unique<NEEdgeNonMaxSuppressionKernel>();
+ _non_max_suppr->configure(&_magnitude, &_phase, &_nonmax, upper_thr, lower_thr, border_mode == BorderMode::UNDEFINED);
// Fill border around magnitude image as non-maxima suppression will access
// it. If border mode is undefined filling the border is a nop.
- _border_mag_gradient.configure(&_magnitude, _non_max_suppr.border_size(), border_mode, constant_border_value);
+ _border_mag_gradient = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _border_mag_gradient->configure(&_magnitude, _non_max_suppr->border_size(), border_mode, constant_border_value);
// Allocate intermediate tensors
_phase.allocator()->allocate();
_magnitude.allocator()->allocate();
// Configure edge tracing
- _edge_trace.configure(&_nonmax, output);
+ _edge_trace = arm_compute::support::cpp14::make_unique<NEEdgeTraceKernel>();
+ _edge_trace->configure(&_nonmax, output);
// Fill border with "No edge" to stop recursion in edge trace
- _border_edge_trace.configure(&_nonmax, _edge_trace.border_size(), BorderMode::CONSTANT, static_cast<float>(0.f));
+ _border_edge_trace = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _border_edge_trace->configure(&_nonmax, _edge_trace->border_size(), BorderMode::CONSTANT, static_cast<float>(0.f));
// Allocate intermediate tensors
_nonmax.allocator()->allocate();
NEScheduler::get().schedule(_gradient.get(), Window::DimY);
// Fill border before non-maxima suppression. Nop for border mode undefined.
- NEScheduler::get().schedule(&_border_mag_gradient, Window::DimZ);
+ NEScheduler::get().schedule(_border_mag_gradient.get(), Window::DimZ);
// Run non-maxima suppression
- NEScheduler::get().schedule(&_non_max_suppr, Window::DimY);
+ NEScheduler::get().schedule(_non_max_suppr.get(), Window::DimY);
ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr);
std::fill_n(_output->buffer(), _output->info()->total_size(), 0);
// Fill border before edge trace
- NEScheduler::get().schedule(&_border_edge_trace, Window::DimZ);
+ NEScheduler::get().schedule(_border_edge_trace.get(), Window::DimZ);
// Run edge tracing
- NEScheduler::get().schedule(&_edge_trace, Window::DimY);
+ NEScheduler::get().schedule(_edge_trace.get(), Window::DimY);
}
+} // namespace arm_compute
#include "arm_compute/runtime/NEON/functions/NECast.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
#include "arm_compute/core/TensorInfo.h"
+#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/NEON/functions/NEChannelCombine.h"
-#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h"
+#include "src/core/NEON/kernels/NEChannelCombineKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/NEON/functions/NEChannelExtract.h"
-#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h"
+#include "src/core/NEON/kernels/NEChannelExtractKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h"
-#include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/NEON/kernels/NEChannelShuffleLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NECol2Im.h"
-#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
+#include "src/core/NEON/kernels/NECol2ImKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEColorConvert.h"
-#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h"
+#include "src/core/NEON/kernels/NEColorConvertKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h"
+#include "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
-#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
+#include "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* SOFTWARE.
*/
#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
+#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEConvertFullyConnectedWeights::~NEConvertFullyConnectedWeights() = default;
+
NEConvertFullyConnectedWeights::NEConvertFullyConnectedWeights()
: _kernel()
{
void NEConvertFullyConnectedWeights::configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape,
DataLayout data_layout)
{
- _kernel.configure(input, output, original_input_shape, data_layout);
+ _kernel = arm_compute::support::cpp14::make_unique<NEConvertFullyConnectedWeightsKernel>();
+ _kernel->configure(input, output, original_input_shape, data_layout);
}
Status NEConvertFullyConnectedWeights::validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape,
void NEConvertFullyConnectedWeights::run()
{
- NEScheduler::get().schedule(&_kernel, Window::DimZ);
+ NEScheduler::get().schedule(_kernel.get(), Window::DimZ);
}
} // namespace arm_compute
\ No newline at end of file
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEConvolutionKernel.h"
+#include "src/core/NEON/kernels/NEConvolutionKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <array>
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+NEConvolution3x3::~NEConvolution3x3() = default;
void NEConvolution3x3::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
{
auto k = arm_compute::support::cpp14::make_unique<NEConvolution3x3Kernel>();
k->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+template <unsigned int matrix_size>
+NEConvolutionSquare<matrix_size>::~NEConvolutionSquare() = default;
+
template <unsigned int matrix_size>
NEConvolutionSquare<matrix_size>::NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
_is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), matrix_size);
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
if(_is_separable)
{
DataType intermediate_type = DataType::UNKNOWN;
scale = calculate_matrix_scale(conv, matrix_size);
}
- _kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
- _kernel_vert.configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED);
+ _kernel_hor = arm_compute::support::cpp14::make_unique<NESeparableConvolutionHorKernel<matrix_size>>();
+ _kernel_vert = arm_compute::support::cpp14::make_unique<NESeparableConvolutionVertKernel<matrix_size>>();
+
+ _kernel_hor->configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
+ _kernel_vert->configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED);
_tmp.allocator()->allocate();
- _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ b->configure(input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value));
}
else
{
- _kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
- _border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value));
+ _kernel = arm_compute::support::cpp14::make_unique<NEConvolutionKernel<matrix_size>>();
+ _kernel->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
+ _border_handler = std::move(b);
}
template <unsigned int matrix_size>
void NEConvolutionSquare<matrix_size>::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
if(_is_separable)
{
MemoryGroupResourceScope scope_mg(_memory_group);
- NEScheduler::get().schedule(&_kernel_hor, Window::DimY);
- NEScheduler::get().schedule(&_kernel_vert, Window::DimY);
+ NEScheduler::get().schedule(_kernel_hor.get(), Window::DimY);
+ NEScheduler::get().schedule(_kernel_vert.get(), Window::DimY);
}
else
{
- NEScheduler::get().schedule(&_kernel, Window::DimY);
+ NEScheduler::get().schedule(_kernel.get(), Window::DimY);
}
}
template class arm_compute::NEConvolutionSquare<7>;
template class arm_compute::NEConvolutionSquare<9>;
+NEConvolutionRectangle::~NEConvolutionRectangle() = default;
+
void NEConvolutionRectangle::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
{
auto k = arm_compute::support::cpp14::make_unique<NEConvolutionRectangleKernel>();
k->configure(input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NECol2ImKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NECopyKernel.h"
+#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
+#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
+#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
+#include "src/core/NEON/kernels/NEFFTScaleKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/NEON/kernels/NEIm2ColKernel.h"
+#include "src/core/NEON/kernels/NEPadLayerKernel.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
#include "support/MemorySupport.h"
#include <cmath>
*/
#include "arm_compute/runtime/NEON/functions/NECopy.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
+#include "src/core/NEON/kernels/NECopyKernel.h"
#include "support/MemorySupport.h"
#include <utility>
namespace arm_compute
{
+NECopy::~NECopy() = default;
+
void NECopy::configure(ITensor *input, ITensor *output)
{
auto k = arm_compute::support::cpp14::make_unique<NECopyKernel>();
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/NEON/functions/NECropResize.h"
+#include "src/core/NEON/kernels/NECropKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
{
+NECropResize::~NECropResize() = default;
+
NECropResize::NECropResize()
: _output(nullptr), _num_boxes(0), _method(), _extrapolation_value(0), _crop(), _scale(), _crop_results(), _scaled_results()
{
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
using namespace arm_compute::misc::shape_calculator;
*/
#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
-#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
+#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
#include "support/MemorySupport.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute::misc;
using namespace arm_compute::misc::shape_calculator;
}
} // namespace
+NEDepthwiseConvolutionLayer::~NEDepthwiseConvolutionLayer() = default;
+
NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(memory_manager), _dwc_optimized_func(memory_manager), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(),
- _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_nchw(true), _permute(false),
- _is_activationlayer_enabled(false), _is_prepared(false)
+ : _memory_group(memory_manager), _dwc_optimized_func(memory_manager), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _accumulator(), _permuted_input(),
+ _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
{
}
}
_original_weights = weights_to_use;
- _depthwise_conv_kernel.configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation);
+ _depthwise_conv_kernel = arm_compute::support::cpp14::make_unique<NEDepthwiseConvolutionLayerNativeKernel>();
+ _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation);
if(_is_nchw)
{
_permute_input.run();
}
- NEScheduler::get().schedule(&_depthwise_conv_kernel, Window::DimY);
+ NEScheduler::get().schedule(_depthwise_conv_kernel.get(), Window::DimY);
if(_is_nchw)
{
#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
-#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
+#include "src/core/NEON/kernels/NEDequantizationLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
/*
- * Copyright (c) 2016, 2017 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/NEON/functions/NEDerivative.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEDerivativeKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEDerivative::~NEDerivative() = default;
NEDerivative::NEDerivative()
: _kernel(), _border_handler()
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
- _kernel.configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
- _border_handler.configure(input, BorderSize(1), border_mode, PixelValue(constant_border_value));
+ _kernel = arm_compute::support::cpp14::make_unique<NEDerivativeKernel>();
+ _border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+
+ _kernel->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _border_handler->configure(input, BorderSize(1), border_mode, PixelValue(constant_border_value));
}
void NEDerivative::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
- NEScheduler::get().schedule(&_kernel, Window::DimY);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
+ NEScheduler::get().schedule(_kernel.get(), Window::DimY);
}
+} // namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEDilate.h"
-#include "arm_compute/core/NEON/kernels/NEDilateKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEDilateKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
auto k = arm_compute::support::cpp14::make_unique<NEDilateKernel>();
k->configure(input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
+#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEDirectConvolutionLayer::~NEDirectConvolutionLayer() = default;
+
NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _output_stage_kernel(), _conv_kernel(), _input_border_handler(), _activationlayer_function(), _accumulator(), _has_bias(false),
_is_activationlayer_enabled(false), _dim_split(Window::DimZ), _is_padding_required()
void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON(input->info()->data_layout() == DataLayout::UNKNOWN);
+ _output_stage_kernel = arm_compute::support::cpp14::make_unique<NEDirectConvolutionLayerOutputStageKernel>();
+ _conv_kernel = arm_compute::support::cpp14::make_unique<NEDirectConvolutionLayerKernel>();
+ _input_border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
// Free accumulator
if(_accumulator.buffer() != nullptr)
// Check if bias should be added in the convolution result
_has_bias = (bias != nullptr);
- _conv_kernel.configure(input, weights, output, conv_info);
+ _conv_kernel->configure(input, weights, output, conv_info);
if(_has_bias)
{
- _output_stage_kernel.configure(output, bias);
+ _output_stage_kernel->configure(output, bias);
}
- _is_padding_required = !_conv_kernel.border_size().empty();
+ _is_padding_required = !_conv_kernel->border_size().empty();
if(_is_padding_required)
{
// Add zero padding XY
- _input_border_handler.configure(input, _conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
+ _input_border_handler->configure(input, _conv_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
}
//Configure Activation Layer
if(_is_padding_required)
{
- NEScheduler::get().schedule(&_input_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_input_border_handler.get(), Window::DimZ);
}
- NEScheduler::get().schedule(&_conv_kernel, _dim_split);
+ NEScheduler::get().schedule(_conv_kernel.get(), _dim_split);
if(_has_bias)
{
- NEScheduler::get().schedule(&_output_stage_kernel, Window::DimY);
+ NEScheduler::get().schedule(_output_stage_kernel.get(), Window::DimY);
}
if(_is_activationlayer_enabled)
*/
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/functions/NEElementwiseOperations.h"
-#include <arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h>
+#include <src/core/NEON/kernels/NEElementwiseOperationKernel.h>
#include "arm_compute/core/ITensor.h"
#include "support/MemorySupport.h"
*/
#include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h"
-#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h"
+#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h"
#include "support/MemorySupport.h"
#include <utility>
/*
- * Copyright (c) 2016, 2017 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NECumulativeDistributionKernel.h"
+#include "src/core/NEON/kernels/NEHistogramKernel.h"
+#include "src/core/NEON/kernels/NEHistogramKernel.h"
+#include "src/core/NEON/kernels/NETableLookupKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEEqualizeHistogram::~NEEqualizeHistogram() = default;
NEEqualizeHistogram::NEEqualizeHistogram()
: _histogram_kernel(), _cd_histogram_kernel(), _map_histogram_kernel(), _hist(nr_bins, 0, max_range), _cum_dist(nr_bins, 0, max_range), _cd_lut(nr_bins, DataType::U8)
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+ _histogram_kernel = arm_compute::support::cpp14::make_unique<NEHistogramKernel>();
+ _cd_histogram_kernel = arm_compute::support::cpp14::make_unique<NECumulativeDistributionKernel>();
+ _map_histogram_kernel = arm_compute::support::cpp14::make_unique<NETableLookupKernel>();
+
// Configure kernels
- _histogram_kernel.configure(input, &_hist);
- _cd_histogram_kernel.configure(input, &_hist, &_cum_dist, &_cd_lut);
- _map_histogram_kernel.configure(input, &_cd_lut, output);
+ _histogram_kernel->configure(input, &_hist);
+ _cd_histogram_kernel->configure(input, &_hist, &_cum_dist, &_cd_lut);
+ _map_histogram_kernel->configure(input, &_cd_lut, output);
}
void NEEqualizeHistogram::run()
{
// Calculate histogram of input.
- NEScheduler::get().schedule(&_histogram_kernel, Window::DimY);
+ NEScheduler::get().schedule(_histogram_kernel.get(), Window::DimY);
// Calculate cumulative distribution of histogram and create LUT.
- NEScheduler::get().schedule(&_cd_histogram_kernel, Window::DimY);
+ NEScheduler::get().schedule(_cd_histogram_kernel.get(), Window::DimY);
// Map input to output using created LUT.
- NEScheduler::get().schedule(&_map_histogram_kernel, Window::DimY);
+ NEScheduler::get().schedule(_map_histogram_kernel.get(), Window::DimY);
}
+} // namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEErode.h"
-#include "arm_compute/core/NEON/kernels/NEErodeKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEErodeKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NEErode::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value)
{
auto k = arm_compute::support::cpp14::make_unique<NEErodeKernel>();
k->configure(input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
\ No newline at end of file
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
+#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
+#include "src/core/NEON/kernels/NEFFTScaleKernel.h"
#include "src/core/utils/helpers/fft.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEFFT1D::~NEFFT1D() = default;
+
NEFFT1D::NEFFT1D(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _digit_reverse_kernel(), _fft_kernels(), _scale_kernel(), _digit_reversed_input(), _digit_reverse_indices(), _num_ffts(0), _axis(0), _run_scale(false)
{
TensorInfo digit_reverse_indices_info(TensorShape(input->info()->tensor_shape()[config.axis]), 1, DataType::U32);
_digit_reverse_indices.allocator()->init(digit_reverse_indices_info);
_memory_group.manage(&_digit_reversed_input);
- _digit_reverse_kernel.configure(input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config);
+ _digit_reverse_kernel = arm_compute::support::cpp14::make_unique<NEFFTDigitReverseKernel>();
+ _digit_reverse_kernel->configure(input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config);
// Create and configure FFT kernels
unsigned int Nx = 1;
fft_kernel_info.radix = radix_for_stage;
fft_kernel_info.Nx = Nx;
fft_kernel_info.is_first_stage = (i == 0);
- _fft_kernels[i].configure(&_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info);
+ _fft_kernels[i] = arm_compute::support::cpp14::make_unique<NEFFTRadixStageKernel>();
+ _fft_kernels[i]->configure(&_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info);
Nx *= radix_for_stage;
}
FFTScaleKernelInfo scale_config;
scale_config.scale = static_cast<float>(N);
scale_config.conjugate = config.direction == FFTDirection::Inverse;
- is_c2r ? _scale_kernel.configure(&_digit_reversed_input, output, scale_config) : _scale_kernel.configure(output, nullptr, scale_config);
+ _scale_kernel = arm_compute::support::cpp14::make_unique<NEFFTScaleKernel>();
+ is_c2r ? _scale_kernel->configure(&_digit_reversed_input, output, scale_config) : _scale_kernel->configure(output, nullptr, scale_config);
}
// Allocate tensors
{
MemoryGroupResourceScope scope_mg(_memory_group);
- NEScheduler::get().schedule(&_digit_reverse_kernel, (_axis == 0 ? Window::DimY : Window::DimZ));
+ NEScheduler::get().schedule(_digit_reverse_kernel.get(), (_axis == 0 ? Window::DimY : Window::DimZ));
for(unsigned int i = 0; i < _num_ffts; ++i)
{
- NEScheduler::get().schedule(&_fft_kernels[i], (_axis == 0 ? Window::DimY : Window::DimX));
+ NEScheduler::get().schedule(_fft_kernels[i].get(), (_axis == 0 ? Window::DimY : Window::DimX));
}
// Run output scaling
if(_run_scale)
{
- NEScheduler::get().schedule(&_scale_kernel, Window::DimY);
+ NEScheduler::get().schedule(_scale_kernel.get(), Window::DimY);
}
}
} // namespace arm_compute
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/Scheduler.h"
+#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
+#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
+#include "src/core/NEON/kernels/NEFFTScaleKernel.h"
namespace arm_compute
{
+NEFFT2D::~NEFFT2D() = default;
+
NEFFT2D::NEFFT2D(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager), _first_pass_func(memory_manager), _second_pass_func(memory_manager), _first_pass_tensor()
{
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/NEON/kernels/NECopyKernel.h"
+#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
+#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
+#include "src/core/NEON/kernels/NEFFTScaleKernel.h"
+#include "src/core/NEON/kernels/NEPadLayerKernel.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/utils/helpers/fft.h"
_is_prepared(false)
{
}
+NEFFTConvolutionLayer::~NEFFTConvolutionLayer() = default;
void NEFFTConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
const ActivationLayerInfo &act_info)
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/Array.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFastCornersKernel.h"
+#include "src/core/NEON/kernels/NEFillArrayKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEFastCorners::~NEFastCorners() = default;
NEFastCorners::NEFastCorners(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)),
_output.allocator()->init(tensor_info);
_memory_group.manage(&_output);
+ _fast_corners_kernel = arm_compute::support::cpp14::make_unique<NEFastCornersKernel>();
+ _border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _fill_kernel = arm_compute::support::cpp14::make_unique<NEFillArrayKernel>();
// If border is UNDEFINED _fast_corners_kernel will operate in xwindow (3,
// width - 3) and ywindow (3, height -3) so the output image will leave the
// pixels on the borders unchanged. This is reflected in the valid region
// of the output. The non maxima suppression is only run on the valid
// pixels.
- _fast_corners_kernel.configure(input, &_output, threshold, nonmax_suppression, BorderMode::UNDEFINED == border_mode);
- _border_handler.configure(input, _fast_corners_kernel.border_size(), border_mode, constant_border_value);
+ _fast_corners_kernel->configure(input, &_output, threshold, nonmax_suppression, BorderMode::UNDEFINED == border_mode);
+ _border_handler->configure(input, _fast_corners_kernel->border_size(), border_mode, constant_border_value);
if(!_non_max)
{
- _fill_kernel.configure(&_output, 1 /* we keep all texels >0 */, corners);
+ _fill_kernel->configure(&_output, 1 /* we keep all texels >0 */, corners);
}
else
{
_suppressed.allocator()->init(tensor_info);
_memory_group.manage(&_suppressed);
- _nonmax_kernel.configure(&_output, &_suppressed, BorderMode::UNDEFINED == border_mode);
- _fill_kernel.configure(&_suppressed, 1 /* we keep all texels >0 */, corners);
+ _nonmax_kernel = arm_compute::support::cpp14::make_unique<NENonMaximaSuppression3x3Kernel>();
+ _nonmax_kernel->configure(&_output, &_suppressed, BorderMode::UNDEFINED == border_mode);
+ _fill_kernel->configure(&_suppressed, 1 /* we keep all texels >0 */, corners);
// Allocate intermediate tensors
_suppressed.allocator()->allocate();
void NEFastCorners::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
MemoryGroupResourceScope scope_mg(_memory_group);
- NEScheduler::get().schedule(&_fast_corners_kernel, Window::DimY);
+ NEScheduler::get().schedule(_fast_corners_kernel.get(), Window::DimY);
if(_non_max)
{
- NEScheduler::get().schedule(&_nonmax_kernel, Window::DimY);
+ NEScheduler::get().schedule(_nonmax_kernel.get(), Window::DimY);
}
- NEScheduler::get().schedule(&_fill_kernel, Window::DimY);
+ NEScheduler::get().schedule(_fill_kernel.get(), Window::DimY);
}
+} // namespace arm_compute
#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEMemsetKernel.h"
#include "support/MemorySupport.h"
#include <utility>
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
void NEFillBorder::configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value)
{
- _border_handler.configure(input, BorderSize(border_width), border_mode, constant_border_value);
+ _border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _border_handler->configure(input, BorderSize(border_width), border_mode, constant_border_value);
}
void NEFillBorder::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
}
} // namespace arm_compute
\ No newline at end of file
*/
#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
#include "arm_compute/core/Size2D.h"
+#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEFloor.h"
-#include "arm_compute/core/NEON/kernels/NEFloorKernel.h"
+#include "src/core/NEON/kernels/NEFloorKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
+#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/NEON/kernels/NETransposeKernel.h"
#include "support/MemorySupport.h"
return NETransposeKernel::validate(input, output);
}
+NEFullyConnectedLayer::~NEFullyConnectedLayer() = default;
+
NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
: _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten_kernel(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(),
_reshape_weights_managed_function(), _mm_gemm(nullptr, weights_manager), _mm_gemmlowp(nullptr, weights_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(),
// Configure flatten kernel
_memory_group.manage(&_flatten_output);
- _flatten_kernel.configure(input, &_flatten_output);
+
+ _flatten_kernel = arm_compute::support::cpp14::make_unique<NEFlattenLayerKernel>();
+ _flatten_kernel->configure(input, &_flatten_output);
// Configure matrix multiply kernel
configure_mm(&_flatten_output, weights, biases, output, act);
// Linearize input if it comes from a convolutional layer
if(_is_fc_after_conv)
{
- NEScheduler::get().schedule(&_flatten_kernel, Window::DimY);
+ NEScheduler::get().schedule(_flatten_kernel.get(), Window::DimY);
}
// Run matrix multiply
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEFuseBatchNormalization::~NEFuseBatchNormalization() = default;
+
NEFuseBatchNormalization::NEFuseBatchNormalization()
: _fuse_bn_kernel()
{
const ITensor *input_bias, const ITensor *bn_beta, const ITensor *bn_gamma,
float epsilon, FuseBatchNormalizationType fbn_type)
{
- _fuse_bn_kernel.configure(input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
+ _fuse_bn_kernel = arm_compute::support::cpp14::make_unique<NEFuseBatchNormalizationKernel>();
+ _fuse_bn_kernel->configure(input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
}
Status NEFuseBatchNormalization::validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
void NEFuseBatchNormalization::run()
{
- NEScheduler::get().schedule(&_fuse_bn_kernel, Window::DimY);
+ NEScheduler::get().schedule(_fuse_bn_kernel.get(), Window::DimY);
}
} // namespace arm_compute
#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
#include "arm_compute/runtime/TensorAllocator.h"
#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
+#include "support/MemorySupport.h"
#include <cmath>
namespace arm_compute
{
+NEGEMM::~NEGEMM() = default;
+
NEGEMM::NEGEMM(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
: _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(memory_manager, weights_manager), _ma_kernel(),
_alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false),
_memory_group.manage(&_tmp_d);
}
+ _mm_kernel = arm_compute::support::cpp14::make_unique<NEGEMMMatrixMultiplyKernel>();
+
// Select between GEMV and GEMM
if(_run_vector_matrix_multiplication)
{
// Configure the matrix multiply kernel
- _mm_kernel.configure(a, b, gemm_output_to_use, alpha, false);
+ _mm_kernel->configure(a, b, gemm_output_to_use, alpha, false);
}
else
{
int k = a->info()->dimension(0);
// Configure interleave kernel
- _interleave_kernel.configure(a, &_tmp_a);
+ _interleave_kernel = arm_compute::support::cpp14::make_unique<NEGEMMInterleave4x4Kernel>();
+ _interleave_kernel->configure(a, &_tmp_a);
// Configure transpose kernel
- _transpose_kernel.configure(b, &_tmp_b);
+ _transpose_kernel = arm_compute::support::cpp14::make_unique<NEGEMMTranspose1xWKernel>();
+ _transpose_kernel->configure(b, &_tmp_b);
// Configure matrix multiplication kernel
- _mm_kernel.configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k));
+ _mm_kernel->configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k));
// Allocate once the all configure methods have been called
_tmp_a.allocator()->allocate();
// Configure matrix addition kernel
if(_run_addition)
{
- _ma_kernel.configure(c, d, beta);
+ _ma_kernel = arm_compute::support::cpp14::make_unique<NEGEMMMatrixAdditionKernel>();
+ _ma_kernel->configure(c, d, beta);
}
// Configure activation
if(!_run_vector_matrix_multiplication)
{
// Run interleave kernel
- NEScheduler::get().schedule(&_interleave_kernel, Window::DimY);
+ NEScheduler::get().schedule(_interleave_kernel.get(), Window::DimY);
if(!_reshape_b_only_on_first_run)
{
// Run transpose kernel
- NEScheduler::get().schedule(&_transpose_kernel, Window::DimY);
+ NEScheduler::get().schedule(_transpose_kernel.get(), Window::DimY);
}
}
- NEScheduler::get().schedule(&_mm_kernel, _run_vector_matrix_multiplication ? Window::DimX : Window::DimY);
+ NEScheduler::get().schedule(_mm_kernel.get(), _run_vector_matrix_multiplication ? Window::DimX : Window::DimY);
// Run bias addition kernel
if(_run_bias_addition)
// Run matrix addition kernel
if(_run_addition)
{
- NEScheduler::get().schedule(&_ma_kernel, Window::DimY);
+ NEScheduler::get().schedule(_ma_kernel.get(), Window::DimY);
}
// Run activation function
}
_tmp_b.allocator()->allocate();
- NEScheduler::get().schedule(&_transpose_kernel, Window::DimY);
+ NEScheduler::get().schedule(_transpose_kernel.get(), Window::DimY);
if(!original_b_managed_by_weights_manager)
{
_original_b->mark_as_unused();
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NECol2ImKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/NEON/kernels/NEIm2ColKernel.h"
+#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
+#include "support/MemorySupport.h"
+
#include <set>
#include <tuple>
{
using namespace arm_compute::misc::shape_calculator;
+NEConvolutionLayerReshapeWeights::~NEConvolutionLayerReshapeWeights() = default;
NEConvolutionLayerReshapeWeights::NEConvolutionLayerReshapeWeights()
: _weights_reshape_kernel()
{
const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type());
const ITensor *biases_to_use = (append_biases) ? biases : nullptr;
- _weights_reshape_kernel.configure(weights, biases_to_use, output);
+ _weights_reshape_kernel = arm_compute::support::cpp14::make_unique<NEWeightsReshapeKernel>();
+ _weights_reshape_kernel->configure(weights, biases_to_use, output);
output->info()->set_quantization_info(weights->info()->quantization_info());
}
void NEConvolutionLayerReshapeWeights::run()
{
- NEScheduler::get().schedule(&_weights_reshape_kernel, 3);
+ NEScheduler::get().schedule(_weights_reshape_kernel.get(), 3);
}
+NEGEMMConvolutionLayer::~NEGEMMConvolutionLayer() = default;
+
NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager, IWeightsManager *weights_manager)
: _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager),
_col2im_kernel(), _reshape_layer(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _data_layout(DataLayout::NCHW), _skip_im2col(false),
_memory_group.manage(&_im2col_output);
// Configure
- _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, false, dilation);
+ _im2col_kernel = arm_compute::support::cpp14::make_unique<NEIm2ColKernel>();
+ _im2col_kernel->configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, false, dilation);
// Update GEMM input
gemm_input_to_use = &_im2col_output;
if(_data_layout == DataLayout::NCHW)
{
// Configure col2im
- _col2im_kernel.configure(gemm_output_to_use, output, Size2D(conv_w, conv_h));
+ _col2im_kernel = arm_compute::support::cpp14::make_unique<NECol2ImKernel>();
+ _col2im_kernel->configure(gemm_output_to_use, output, Size2D(conv_w, conv_h));
}
else
{
{
// Run input reshaping
unsigned int y_dim = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
- NEScheduler::get().schedule(&_im2col_kernel, y_dim);
+ NEScheduler::get().schedule(_im2col_kernel.get(), y_dim);
}
// Runs NEGEMM or NEGEMMLowpMatrixMultiplyCore functions
{
if(_data_layout == DataLayout::NCHW)
{
- NEScheduler::get().schedule(&_col2im_kernel, Window::DimY);
+ NEScheduler::get().schedule(_col2im_kernel.get(), Window::DimY);
}
else
{
*/
#include "arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEGEMMLowpAssemblyMatrixMultiplyCore::~NEGEMMLowpAssemblyMatrixMultiplyCore() = default;
NEGEMMLowpAssemblyMatrixMultiplyCore::NEGEMMLowpAssemblyMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager), _asm_glue(memory_manager), _mm_kernel(nullptr), _mtx_a_reshape_kernel(nullptr), _mtx_b_reshape_kernel(nullptr), _tmp_a(), _tmp_b()
NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY);
}
}
+} // namespace arm_compute
\ No newline at end of file
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
#include "src/core/helpers/AutoConfiguration.h"
+
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+
#include "support/MemorySupport.h"
namespace arm_compute
{
using namespace arm_compute::misc::shape_calculator;
+NEGEMMLowpMatrixMultiplyCore::~NEGEMMLowpMatrixMultiplyCore() = default;
+
NEGEMMLowpMatrixMultiplyCore::NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
: _memory_group(memory_manager), _weights_manager(weights_manager), _asm_glue(memory_manager, weights_manager), _mm_kernel(), _mtx_a_reshape_kernel(), _mtx_b_reshape_kernel(),
_mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _activation_func(), _convert_to_signed_asymm(),
_signed_a.allocator()->init(a_to_use->info()->clone()->set_data_type(dt).set_quantization_info(QuantizationInfo(iqinfo.scale, iqinfo.offset + offset_correction)));
_memory_group.manage(&_signed_a);
- _convert_to_signed_asymm.configure(a_to_use, &_signed_a);
+ _convert_to_signed_asymm = arm_compute::support::cpp14::make_unique<NEConvertQuantizedSignednessKernel>();
+ _convert_to_signed_asymm->configure(a_to_use, &_signed_a);
a_to_use = &_signed_a;
_a_offset = _signed_a.info()->quantization_info().uniform().offset;
}
// Configure interleave kernel
- _mtx_a_reshape_kernel.configure(a_to_use, &_tmp_a);
+ _mtx_a_reshape_kernel = arm_compute::support::cpp14::make_unique<NEGEMMInterleave4x4Kernel>();
+ _mtx_a_reshape_kernel->configure(a_to_use, &_tmp_a);
// Configure transpose kernel
- _mtx_b_reshape_kernel.configure(b, &_tmp_b);
+ _mtx_b_reshape_kernel = arm_compute::support::cpp14::make_unique<NEGEMMTranspose1xWKernel>();
+ _mtx_b_reshape_kernel->configure(b, &_tmp_b);
}
if(!_fused_assembly_path)
}
// Configure Matrix B reduction kernel
- _mtx_b_reduction_kernel.configure(b, &_vector_sum_col, reduction_info);
+ _mtx_b_reduction_kernel = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixBReductionKernel>();
+ _mtx_b_reduction_kernel->configure(b, &_vector_sum_col, reduction_info);
}
// Initialize Matrix A reduction kernel only if _b_offset is not equal to 0
_memory_group.manage(&_vector_sum_row);
// Configure matrix A reduction kernel
- _mtx_a_reduction_kernel.configure(a_to_use, &_vector_sum_row, reduction_info);
+ _mtx_a_reduction_kernel = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _mtx_a_reduction_kernel->configure(a_to_use, &_vector_sum_row, reduction_info);
}
if(_fuse_output_stage)
// Configure matrix multiply kernel
if(!_assembly_path)
{
- _mm_kernel.configure(matrix_a, matrix_b, &_mm_result_s32);
+ _mm_kernel = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>();
+ _mm_kernel->configure(matrix_a, matrix_b, &_mm_result_s32);
}
- _offset_contribution_output_stage_kernel.configure(&_mm_result_s32,
- _a_offset == 0 ? nullptr : &_vector_sum_col,
- _b_offset == 0 ? nullptr : &_vector_sum_row, c,
- _flip_signedness ? &_signed_output : output,
- a->info()->dimension(0),
- _a_offset, _b_offset, info.gemmlowp_output_stage());
+ _offset_contribution_output_stage_kernel = arm_compute::support::cpp14::make_unique<NEGEMMLowpOffsetContributionOutputStageKernel>();
+ _offset_contribution_output_stage_kernel->configure(&_mm_result_s32,
+ _a_offset == 0 ? nullptr : &_vector_sum_col,
+ _b_offset == 0 ? nullptr : &_vector_sum_row, c,
+ _flip_signedness ? &_signed_output : output,
+ a->info()->dimension(0),
+ _a_offset, _b_offset, info.gemmlowp_output_stage());
if(_flip_signedness)
{
- _convert_from_signed_asymm.configure(&_signed_output, output);
+ _convert_from_signed_asymm = arm_compute::support::cpp14::make_unique<NEConvertQuantizedSignednessKernel>();
+ _convert_from_signed_asymm->configure(&_signed_output, output);
}
}
else
// Configure matrix multiply kernel
if(!_assembly_path)
{
- _mm_kernel.configure(matrix_a, matrix_b, output);
+ _mm_kernel = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>();
+ _mm_kernel->configure(matrix_a, matrix_b, output);
}
// Configure offset contribution kernel
- _offset_contribution_kernel.configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, a_to_use->info()->dimension(0), _a_offset, _b_offset);
+ _offset_contribution_kernel = arm_compute::support::cpp14::make_unique<NEGEMMLowpOffsetContributionKernel>();
+ _offset_contribution_kernel->configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, a_to_use->info()->dimension(0), _a_offset, _b_offset);
}
// Configure activation
// Convert QASYMM8->QASYMM8_SIGNED
if(_flip_signedness)
{
- NEScheduler::get().schedule(&_convert_to_signed_asymm, Window::DimY);
+ NEScheduler::get().schedule(_convert_to_signed_asymm.get(), Window::DimY);
}
// Run GEMM
if(!_run_vector_matrix_multiplication)
{
// Run interleave kernel
- NEScheduler::get().schedule(&_mtx_a_reshape_kernel, Window::DimY);
+ NEScheduler::get().schedule(_mtx_a_reshape_kernel.get(), Window::DimY);
if(!_reshape_b_only_on_first_run)
{
// Run transpose kernel
- NEScheduler::get().schedule(&_mtx_b_reshape_kernel, Window::DimY);
+ NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);
}
}
- NEScheduler::get().schedule(&_mm_kernel, Window::DimY);
+ NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY);
}
if(!_fused_assembly_path)
// Run matrix A reduction kernel only if _b_offset is not equal to 0
if(_b_offset != 0)
{
- NEScheduler::get().schedule(&_mtx_a_reduction_kernel, Window::DimX);
+ NEScheduler::get().schedule(_mtx_a_reduction_kernel.get(), Window::DimX);
}
// Run matrix B reduction kernel only if _a_offset is not equal to 0
if(_a_offset != 0 && !_reshape_b_only_on_first_run)
{
- NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);
+ NEScheduler::get().schedule(_mtx_b_reduction_kernel.get(), Window::DimX);
}
if(_fuse_output_stage)
{
// Run offset contribution kernel
- NEScheduler::get().schedule(&_offset_contribution_output_stage_kernel, Window::DimY);
+ NEScheduler::get().schedule(_offset_contribution_output_stage_kernel.get(), Window::DimY);
}
else
{
// Run offset contribution kernel
- NEScheduler::get().schedule(&_offset_contribution_kernel, Window::DimY);
+ NEScheduler::get().schedule(_offset_contribution_kernel.get(), Window::DimY);
}
}
// Convert QASYMM8_SIGNED->QASYMM8
- if(_flip_signedness)
+ if(!_fused_assembly_path && _fuse_output_stage && _flip_signedness)
{
- NEScheduler::get().schedule(&_convert_from_signed_asymm, Window::DimY);
+ NEScheduler::get().schedule(_convert_from_signed_asymm.get(), Window::DimY);
}
// Run fused activation unless already run in the fused assembly
// Run reshape kernel and mark original weights tensor as unused
_tmp_b.allocator()->allocate();
- NEScheduler::get().schedule(&_mtx_b_reshape_kernel, Window::DimY);
+ NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);
if(!original_b_managed_by_weights_manager)
{
_original_b->mark_as_unused();
if(!_fused_assembly_path && _a_offset != 0 && _reshape_b_only_on_first_run)
{
_vector_sum_col.allocator()->allocate();
- NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);
+ NEScheduler::get().schedule(_mtx_b_reduction_kernel.get(), Window::DimX);
}
_is_prepared = true;
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
{
+NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint() = default;
+
void NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift,
int result_offset_after_shift, int min, int max)
{
return NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::validate(input, bias, output, min, max);
}
+NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint() = default;
+
void NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift,
int result_offset_after_shift, int min, int max)
{
return NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::validate(input, bias, output, min, max);
}
+NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint() = default;
+
void NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min, int max)
{
auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel>();
return NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::validate(input, bias, output, min, max);
}
+NEGEMMLowpOutputStage::~NEGEMMLowpOutputStage() = default;
+
void NEGEMMLowpOutputStage::configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo &info)
{
// Perform validate step
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEGather.h"
-#include "arm_compute/core/NEON/kernels/NEGatherKernel.h"
+#include "src/core/NEON/kernels/NEGatherKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h"
-#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEGaussian3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NEGaussian3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value)
{
auto k = arm_compute::support::cpp14::make_unique<NEGaussian3x3Kernel>();
k->configure(input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
\ No newline at end of file
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEGaussian5x5::~NEGaussian5x5() = default;
NEGaussian5x5::NEGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _kernel_hor(), _kernel_vert(), _tmp(), _border_handler()
// Manage intermediate buffers
_memory_group.manage(&_tmp);
+ _kernel_hor = arm_compute::support::cpp14::make_unique<NEGaussian5x5HorKernel>();
+ _kernel_vert = arm_compute::support::cpp14::make_unique<NEGaussian5x5VertKernel>();
+ _border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+
// Create and configure kernels for the two passes
- _kernel_hor.configure(input, &_tmp, border_mode == BorderMode::UNDEFINED);
- _kernel_vert.configure(&_tmp, output, border_mode == BorderMode::UNDEFINED);
+ _kernel_hor->configure(input, &_tmp, border_mode == BorderMode::UNDEFINED);
+ _kernel_vert->configure(&_tmp, output, border_mode == BorderMode::UNDEFINED);
_tmp.allocator()->allocate();
- _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value));
}
void NEGaussian5x5::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
MemoryGroupResourceScope scope_mg(_memory_group);
- NEScheduler::get().schedule(&_kernel_hor, Window::DimY);
- NEScheduler::get().schedule(&_kernel_vert, Window::DimY);
+ NEScheduler::get().schedule(_kernel_hor.get(), Window::DimY);
+ NEScheduler::get().schedule(_kernel_vert.get(), Window::DimY);
}
+} // namespace arm_compute
\ No newline at end of file
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h"
-#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
#include "arm_compute/runtime/Pyramid.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h"
+#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h"
+#include "src/core/NEON/kernels/NEScaleKernel.h"
+#include "support/MemorySupport.h"
#include <cstddef>
{
}
+NEGaussianPyramidHalf::~NEGaussianPyramidHalf() = default;
+
NEGaussianPyramidHalf::NEGaussianPyramidHalf() // NOLINT
: _horizontal_border_handler(),
_vertical_border_handler(),
for(size_t i = 0; i < num_stages; ++i)
{
/* Configure horizontal kernel */
- _horizontal_reduction[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i));
+ _horizontal_reduction[i] = arm_compute::support::cpp14::make_unique<NEGaussianPyramidHorKernel>();
+ _horizontal_reduction[i]->configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i));
/* Configure vertical kernel */
- _vertical_reduction[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1));
+ _vertical_reduction[i] = arm_compute::support::cpp14::make_unique<NEGaussianPyramidVertKernel>();
+ _vertical_reduction[i]->configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1));
/* Configure border */
- _horizontal_border_handler[i].configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i].border_size(), border_mode, PixelValue(constant_border_value));
+ _horizontal_border_handler[i] = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _horizontal_border_handler[i]->configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i]->border_size(), border_mode, PixelValue(constant_border_value));
/* Configure border */
- _vertical_border_handler[i].configure(_tmp.get_pyramid_level(i), _vertical_reduction[i].border_size(), border_mode, PixelValue(pixel_value_u16));
+ _vertical_border_handler[i] = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _vertical_border_handler[i]->configure(_tmp.get_pyramid_level(i), _vertical_reduction[i]->border_size(), border_mode, PixelValue(pixel_value_u16));
}
_tmp.allocate();
for(unsigned int i = 0; i < num_levels - 1; ++i)
{
- NEScheduler::get().schedule(&_horizontal_border_handler[i], Window::DimZ);
- NEScheduler::get().schedule(&_horizontal_reduction[i], Window::DimY);
- NEScheduler::get().schedule(&_vertical_border_handler[i], Window::DimZ);
- NEScheduler::get().schedule(&_vertical_reduction[i], Window::DimY);
+ NEScheduler::get().schedule(_horizontal_border_handler[i].get(), Window::DimZ);
+ NEScheduler::get().schedule(_horizontal_reduction[i].get(), Window::DimY);
+ NEScheduler::get().schedule(_vertical_border_handler[i].get(), Window::DimZ);
+ NEScheduler::get().schedule(_vertical_reduction[i].get(), Window::DimY);
}
}
+NEGaussianPyramidOrb::~NEGaussianPyramidOrb() = default;
+
NEGaussianPyramidOrb::NEGaussianPyramidOrb() // NOLINT
: _gaus5x5(),
_scale_nearest()
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NECopyKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEPadLayerKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
{
NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager),
- _permute_deltas_kernel(),
+ _permute_deltas(),
_flatten_deltas(),
- _permute_scores_kernel(),
+ _permute_scores(),
_flatten_scores(),
- _compute_anchors_kernel(),
- _bounding_box_kernel(),
- _pad_kernel(),
+ _compute_anchors(),
+ _bounding_box(),
+ _pad(),
_dequantize_anchors(),
_dequantize_deltas(),
_quantize_all_proposals(),
{
}
+NEGenerateProposalsLayer::~NEGenerateProposalsLayer() = default;
+
void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *deltas, const ITensor *anchors, ITensor *proposals, ITensor *scores_out, ITensor *num_valid_proposals,
const GenerateProposalsInfo &info)
{
// Compute all the anchors
_memory_group.manage(&_all_anchors);
- _compute_anchors_kernel.configure(anchors, &_all_anchors, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()));
+ _compute_anchors.configure(anchors, &_all_anchors, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()));
const TensorShape flatten_shape_deltas(values_per_roi, total_num_anchors);
_deltas_flattened.allocator()->init(TensorInfo(flatten_shape_deltas, 1, scores_data_type, deltas->info()->quantization_info()));
if(!_is_nhwc)
{
_memory_group.manage(&_deltas_permuted);
- _permute_deltas_kernel.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
+ _permute_deltas.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
_flatten_deltas.configure(&_deltas_permuted, &_deltas_flattened);
_deltas_permuted.allocator()->allocate();
}
if(!_is_nhwc)
{
_memory_group.manage(&_scores_permuted);
- _permute_scores_kernel.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
+ _permute_scores.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
_flatten_scores.configure(&_scores_permuted, &_scores_flattened);
_scores_permuted.allocator()->allocate();
}
// Bounding box transform
_memory_group.manage(&_all_proposals);
BoundingBoxTransformInfo bbox_info(info.im_width(), info.im_height(), 1.f);
- _bounding_box_kernel.configure(anchors_to_use, &_all_proposals, deltas_to_use, bbox_info);
+ _bounding_box.configure(anchors_to_use, &_all_proposals, deltas_to_use, bbox_info);
deltas_to_use->allocator()->allocate();
anchors_to_use->allocator()->allocate();
_scores_flattened.allocator()->allocate();
// Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images
- _pad_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
+ _pad.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
_proposals_4_roi_values.allocator()->allocate();
}
}
TensorInfo all_anchors_info(anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
- ARM_COMPUTE_RETURN_ON_ERROR(NEComputeAllAnchorsKernel::validate(anchors, &all_anchors_info, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale())));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEComputeAllAnchors::validate(anchors, &all_anchors_info, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale())));
TensorInfo deltas_permuted_info = deltas->clone()->set_tensor_shape(TensorShape(values_per_roi * num_anchors, feat_width, feat_height)).set_is_resizable(true);
TensorInfo scores_permuted_info = scores->clone()->set_tensor_shape(TensorShape(num_anchors, feat_width, feat_height)).set_is_resizable(true);
}
else
{
- ARM_COMPUTE_RETURN_ON_ERROR(NEPermuteKernel::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 }));
- ARM_COMPUTE_RETURN_ON_ERROR(NEPermuteKernel::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 }));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 }));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 }));
}
TensorInfo deltas_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
if(is_qasymm8)
{
TensorInfo all_anchors_f32_info(anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32));
- ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayerKernel::validate(&all_anchors_info, &all_anchors_f32_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayer::validate(&all_anchors_info, &all_anchors_f32_info));
TensorInfo deltas_flattened_f32_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32));
- ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayerKernel::validate(&deltas_flattened_info, &deltas_flattened_f32_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayer::validate(&deltas_flattened_info, &deltas_flattened_f32_info));
TensorInfo proposals_4_roi_values_f32(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32));
- ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransformKernel::validate(&all_anchors_f32_info, &proposals_4_roi_values_f32, &deltas_flattened_f32_info,
- BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f)));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransform::validate(&all_anchors_f32_info, &proposals_4_roi_values_f32, &deltas_flattened_f32_info,
+ BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f)));
- ARM_COMPUTE_RETURN_ON_ERROR(NEQuantizationLayerKernel::validate(&proposals_4_roi_values_f32, &proposals_4_roi_values_quantized));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEQuantizationLayer::validate(&proposals_4_roi_values_f32, &proposals_4_roi_values_quantized));
proposals_4_roi_values_to_use = &proposals_4_roi_values_quantized;
}
else
{
- ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransformKernel::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info,
- BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f)));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransform::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info,
+ BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f)));
}
- ARM_COMPUTE_RETURN_ON_ERROR(NEPadLayerKernel::validate(proposals_4_roi_values_to_use, proposals, PaddingList{ { 1, 0 } }));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEPadLayer::validate(proposals_4_roi_values_to_use, proposals, PaddingList{ { 1, 0 } }));
if(num_valid_proposals->total_size() > 0)
{
MemoryGroupResourceScope scope_mg(_memory_group);
// Compute all the anchors
- NEScheduler::get().schedule(&_compute_anchors_kernel, Window::DimY);
+ _compute_anchors.run();
// Transpose and reshape the inputs
if(!_is_nhwc)
{
- NEScheduler::get().schedule(&_permute_deltas_kernel, Window::DimY);
- NEScheduler::get().schedule(&_permute_scores_kernel, Window::DimY);
+ _permute_deltas.run();
+ _permute_scores.run();
}
_flatten_deltas.run();
if(_is_qasymm8)
{
- NEScheduler::get().schedule(&_dequantize_anchors, Window::DimY);
- NEScheduler::get().schedule(&_dequantize_deltas, Window::DimY);
+ _dequantize_anchors.run();
+ _dequantize_deltas.run();
}
// Build the boxes
- NEScheduler::get().schedule(&_bounding_box_kernel, Window::DimY);
+ _bounding_box.run();
if(_is_qasymm8)
{
- NEScheduler::get().schedule(&_quantize_all_proposals, Window::DimY);
+ _quantize_all_proposals.run();
}
// Non maxima suppression
_cpp_nms.run();
// Add dummy batch indexes
- NEScheduler::get().schedule(&_pad_kernel, Window::DimY);
+ _pad.run();
}
} // namespace arm_compute
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEDerivativeKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEHOGDescriptor::~NEHOGDescriptor() = default;
NEHOGDescriptor::NEHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _gradient(), _orient_bin(), _block_norm(), _mag(), _phase(), _hog_space()
_memory_group.manage(&_hog_space);
// Initialise orientation binning kernel
- _orient_bin.configure(&_mag, &_phase, &_hog_space, hog->info());
+ _orient_bin = arm_compute::support::cpp14::make_unique<NEHOGOrientationBinningKernel>();
+ _orient_bin->configure(&_mag, &_phase, &_hog_space, hog->info());
// Initialize HOG norm kernel
- _block_norm.configure(&_hog_space, output, hog->info());
+ _block_norm = arm_compute::support::cpp14::make_unique<NEHOGBlockNormalizationKernel>();
+ _block_norm->configure(&_hog_space, output, hog->info());
// Allocate intermediate tensors
_mag.allocator()->allocate();
_gradient.run();
// Run orientation binning kernel
- NEScheduler::get().schedule(&_orient_bin, Window::DimY);
+ NEScheduler::get().schedule(_orient_bin.get(), Window::DimY);
// Run block normalization kernel
- NEScheduler::get().schedule(&_block_norm, Window::DimY);
+ NEScheduler::get().schedule(_block_norm.get(), Window::DimY);
}
+} // namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h"
+#include "src/core/NEON/kernels/NEHOGDetectorKernel.h"
#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEHOGDetector::~NEHOGDetector() = default;
void NEHOGDetector::configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold, size_t idx_class)
{
k->configure(input, hog, detection_windows, detection_window_stride, threshold, idx_class);
_kernel = std::move(k);
}
+} // namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h"
-#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEDerivativeKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h"
#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEHOGGradient::~NEHOGGradient() = default;
NEHOGGradient::NEHOGGradient(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
// Run magnitude/phase kernel
NEScheduler::get().schedule(_mag_phase.get(), Window::DimY);
}
+} // namespace arm_compute
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/Tensor.h"
+#include "src/core/NEON/kernels/NEDerivativeKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEHOGMultiDetection::~NEHOGMultiDetection() = default;
NEHOGMultiDetection::NEHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
NEScheduler::get().schedule(&_non_maxima_kernel, Window::DimY);
}
}
+} // namespace arm_compute
#include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/Array.h"
#include "arm_compute/runtime/NEON/functions/NESobel5x5.h"
#include "arm_compute/runtime/NEON/functions/NESobel7x7.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEHarrisCornersKernel.h"
+#include "src/core/NEON/kernels/NESobel5x5Kernel.h"
+#include "src/core/NEON/kernels/NESobel7x7Kernel.h"
#include "support/MemorySupport.h"
#include <cmath>
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+NEHarrisCorners::~NEHarrisCorners() = default;
NEHarrisCorners::NEHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
}
// Configure border filling before harris score
- _border_gx.configure(&_gx, _harris_score->border_size(), border_mode, constant_border_value);
- _border_gy.configure(&_gy, _harris_score->border_size(), border_mode, constant_border_value);
+ _border_gx = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _border_gy = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _border_gx->configure(&_gx, _harris_score->border_size(), border_mode, constant_border_value);
+ _border_gy->configure(&_gy, _harris_score->border_size(), border_mode, constant_border_value);
// Allocate once all the configure methods have been called
_gx.allocator()->allocate();
_sobel->run();
// Fill border before harris score kernel
- NEScheduler::get().schedule(&_border_gx, Window::DimZ);
- NEScheduler::get().schedule(&_border_gy, Window::DimZ);
+ NEScheduler::get().schedule(_border_gx.get(), Window::DimZ);
+ NEScheduler::get().schedule(_border_gy.get(), Window::DimZ);
// Run harris score kernel
NEScheduler::get().schedule(_harris_score.get(), Window::DimY);
// Run sort & euclidean distance
NEScheduler::get().schedule(&_sort_euclidean, Window::DimY);
}
+} // namespace arm_compute
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEHistogramKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEHistogram::~NEHistogram() = default;
NEHistogram::NEHistogram()
: _histogram_kernel(), _local_hist(), _window_lut(window_lut_default_size), _local_hist_size(0)
_local_hist.resize(_local_hist_size);
// Configure kernel
- _histogram_kernel.configure(input, output, _local_hist.data(), _window_lut.data());
+ _histogram_kernel = arm_compute::support::cpp14::make_unique<NEHistogramKernel>();
+ _histogram_kernel->configure(input, output, _local_hist.data(), _window_lut.data());
}
void NEHistogram::run()
{
// Calculate histogram of input.
- NEScheduler::get().schedule(&_histogram_kernel, Window::DimY);
+ NEScheduler::get().schedule(_histogram_kernel.get(), Window::DimY);
}
+} // namespace arm_compute
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEIm2ColKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEIm2Col::~NEIm2Col() = default;
+
NEIm2Col::NEIm2Col()
: _kernel(), _y_dim(1)
{
{
_y_dim = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);
- _kernel.configure(input, output, kernel_dims, conv_info, has_bias, dilation, num_groups);
+ _kernel = arm_compute::support::cpp14::make_unique<NEIm2ColKernel>();
+ _kernel->configure(input, output, kernel_dims, conv_info, has_bias, dilation, num_groups);
}
Status NEIm2Col::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation,
void NEIm2Col::run()
{
- NEScheduler::get().schedule(&_kernel, _y_dim);
+ NEScheduler::get().schedule(_kernel.get(), _y_dim);
}
} // namespace arm_compute
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEInstanceNormalizationLayer::~NEInstanceNormalizationLayer() = default;
+
NEInstanceNormalizationLayer::NEInstanceNormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false), _permute_input(), _permute_output(), _permuted_input(), _permuted_output()
{
// Configure Kernels
_is_nchw = data_layout == DataLayout::NCHW;
+ _normalization_kernel = arm_compute::support::cpp14::make_unique<NEInstanceNormalizationLayerKernel>();
+
if(!_is_nchw)
{
_memory_group.manage(&_permuted_input);
_permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
_permuted_input.info()->set_data_layout(DataLayout::NCHW);
- _normalization_kernel.configure(&_permuted_input, &_permuted_output, kernel_descriptor);
+ _normalization_kernel->configure(&_permuted_input, &_permuted_output, kernel_descriptor);
_permuted_output.info()->set_data_layout(DataLayout::NCHW);
_permute_output.configure(&_permuted_output, output != nullptr ? output : input, PermutationVector(2U, 0U, 1U));
}
else
{
- _normalization_kernel.configure(input, output, kernel_descriptor);
+ _normalization_kernel->configure(input, output, kernel_descriptor);
}
}
_permute_input.run();
}
- NEScheduler::get().schedule(&_normalization_kernel, Window::DimZ);
+ NEScheduler::get().schedule(_normalization_kernel.get(), Window::DimZ);
// Permute output
if(!_is_nchw)
*/
#include "arm_compute/runtime/NEON/functions/NEIntegralImage.h"
-#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEIntegralImageKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+NEIntegralImage::~NEIntegralImage() = default;
void NEIntegralImage::configure(const ITensor *input, ITensor *output)
{
auto k = arm_compute::support::cpp14::make_unique<NEIntegralImageKernel>();
k->configure(input, output);
_kernel = std::move(k);
- _border_handler.configure(output, _kernel->border_size(), BorderMode::CONSTANT, PixelValue());
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(output, _kernel->border_size(), BorderMode::CONSTANT, PixelValue());
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/Helpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
{
constexpr int max_input_tensor_dim = 3;
} // namespace
+NEL2NormalizeLayer::~NEL2NormalizeLayer() = default;
NEL2NormalizeLayer::NEL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq()
// Configure Kernels
const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
_reduce_func.configure(input, &_sumsq, actual_axis, ReductionOperation::SUM_SQUARE);
- _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon);
+ _normalize_kernel = arm_compute::support::cpp14::make_unique<NEL2NormalizeLayerKernel>();
+ _normalize_kernel->configure(input, &_sumsq, output, axis, epsilon);
// Allocate intermediate tensors
_sumsq.allocator()->allocate();
MemoryGroupResourceScope scope_mg(_memory_group);
_reduce_func.run();
- NEScheduler::get().schedule(&_normalize_kernel, Window::DimY);
+ NEScheduler::get().schedule(_normalize_kernel.get(), Window::DimY);
}
} // namespace arm_compute
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/common/LSTMParams.h"
+#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
namespace arm_compute
{
using namespace arm_compute::misc::shape_calculator;
using namespace arm_compute::utils::info_helpers;
+NELSTMLayer::~NELSTMLayer() = default;
+
NELSTMLayer::NELSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _fully_connected_input_gate(), _accum_input_gate1(), _subtract_input_gate(), _pixelwise_mul_input_gate(), _activation_input_gate(),
_fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(), _transpose_cell_state(),
}
// Validate copy kernel
- ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(&cell_state_tmp, cell_state_out));
- ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(output_state_out, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(&cell_state_tmp, cell_state_out));
+ ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(output_state_out, output));
// Validate scratch concatenation
std::vector<const ITensorInfo *> inputs_vector_info_raw;
}
_fully_connected_cell_state.run();
- NEScheduler::get().schedule(&_transpose_cell_state, Window::DimY);
+ _transpose_cell_state.run();
_gemm_cell_state1.run();
_accum_cell_state1.run();
if(_is_layer_norm_lstm)
}
}
- NEScheduler::get().schedule(&_copy_cell_state, Window::DimY);
- NEScheduler::get().schedule(&_copy_output, Window::DimY);
+ _copy_cell_state.run();
+ _copy_output.run();
_concat_scratch_buffer.run();
}
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include <cmath>
const QuantizationInfo qsymm_4(16.f / 32768.f, 0); // qsymm16 with 4 integer bit
const QuantizationInfo qsymm_0(1.f / 32768.f, 0); // qsymm16 with 0 integer bit
} // namespace
+NELSTMLayerQuantized::~NELSTMLayerQuantized() = default;
NELSTMLayerQuantized::NELSTMLayerQuantized(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _gemmlowp(), _output_stage(), _transpose_weights(), _concat_input_weights(), _concat_recurrent_weights(), _concat_weights(), _concat_inputs(),
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h"
#include "arm_compute/runtime/Tensor.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h"
+#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NELaplacianPyramid::~NELaplacianPyramid() = default;
NELaplacianPyramid::NELaplacianPyramid() // NOLINT
: _num_levels(0),
_gauss_pyr.allocate();
_conv_pyr.allocate();
}
+} // namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h"
+#include "arm_compute/core/CPP/ICPPKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/IPyramid.h"
#include "arm_compute/core/ITensor.h"
#include <cstddef>
-using namespace arm_compute;
+namespace arm_compute
+{
+NELaplacianReconstruct::~NELaplacianReconstruct() = default;
NELaplacianReconstruct::NELaplacianReconstruct() // NOLINT
: _tmp_pyr(),
_depthf.run();
}
+} // namespace arm_compute
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEIm2ColKernel.h"
+#include "src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
+#include "support/MemorySupport.h"
#include <cmath>
#include <tuple>
-using namespace arm_compute;
-
+namespace arm_compute
+{
namespace
{
void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
shape_gemm.set(1, mat_input_rows);
}
} // namespace
+NELocallyConnectedLayer::~NELocallyConnectedLayer() = default;
NELocallyConnectedLayer::NELocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(),
+ : _memory_group(std::move(memory_manager)), _input_im2col(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(),
_is_prepared(false), _original_weights(nullptr)
{
}
TensorInfo input_im2col_reshaped_info(shape_im2col, 1, input->data_type());
TensorInfo gemm_output_info(shape_gemm, 1, input->data_type());
- ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &input_im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, has_bias));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEIm2Col::validate(input, &input_im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, has_bias));
ARM_COMPUTE_RETURN_ON_ERROR(NEWeightsReshapeKernel::validate(weights, biases, &weights_reshaped_info));
ARM_COMPUTE_RETURN_ON_ERROR(NELocallyConnectedMatrixMultiplyKernel::validate(&input_im2col_reshaped_info, &weights_reshaped_info, &gemm_output_info));
- ARM_COMPUTE_RETURN_ON_ERROR(NECol2ImKernel::validate(&gemm_output_info, output, Size2D(conv_w, conv_h)));
+ ARM_COMPUTE_RETURN_ON_ERROR(NECol2Im::validate(&gemm_output_info, output, Size2D(conv_w, conv_h)));
return Status{};
}
_memory_group.manage(&_gemm_output);
// Configure kernels
- _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
- _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped);
- _mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output);
- _output_col2im_kernel.configure(&_gemm_output, output, Size2D(conv_w, conv_h));
+ _input_im2col.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
+ _weights_reshape_kernel = arm_compute::support::cpp14::make_unique<NEWeightsReshapeKernel>();
+ _weights_reshape_kernel->configure(weights, biases, &_weights_reshaped);
+ _mm_kernel = arm_compute::support::cpp14::make_unique<NELocallyConnectedMatrixMultiplyKernel>();
+ _mm_kernel->configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output);
+ _output_col2im.configure(&_gemm_output, output, Size2D(conv_w, conv_h));
// Allocate intermediate tensors
_input_im2col_reshaped.allocator()->allocate();
MemoryGroupResourceScope scope_mg(_memory_group);
// Run input reshaping
- NEScheduler::get().schedule(&_input_im2col_kernel, Window::DimY);
+ _input_im2col.run();
// Runs GEMM on reshaped matrices
- NEScheduler::get().schedule(&_mm_kernel, Window::DimX);
+ NEScheduler::get().schedule(_mm_kernel.get(), Window::DimX);
// Reshape output matrix
- NEScheduler::get().schedule(&_output_col2im_kernel, Window::DimY);
+ _output_col2im.run();
}
void NELocallyConnectedLayer::prepare()
// Run weights reshaping and mark original weights tensor as unused
_weights_reshaped.allocator()->allocate();
- NEScheduler::get().schedule(&_weights_reshape_kernel, 3);
+ NEScheduler::get().schedule(_weights_reshape_kernel.get(), 3);
_original_weights->mark_as_unused();
_is_prepared = true;
}
}
+} // namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEMagnitude.h"
-#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+NEMagnitude::~NEMagnitude() = default;
void NEMagnitude::configure(const ITensor *input1, const ITensor *input2, ITensor *output, MagnitudeType mag_type)
{
_kernel = std::move(k);
}
}
+} // namespace arm_compute
#include "arm_compute/core/ITensor.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h"
+#include "src/core/NEON/kernels/NEMemsetKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEMaxUnpoolingLayer::~NEMaxUnpoolingLayer() = default;
+
NEMaxUnpoolingLayer::NEMaxUnpoolingLayer()
: _memset_kernel(), _unpooling_layer_kernel()
void NEMaxUnpoolingLayer::configure(ITensor *input, ITensor *indices, ITensor *output, const PoolingLayerInfo &pool_info)
{
const PixelValue zero_value(0.f);
- _memset_kernel.configure(output, zero_value);
- _unpooling_layer_kernel.configure(input, indices, output, pool_info);
+ _memset_kernel = arm_compute::support::cpp14::make_unique<NEMemsetKernel>();
+ _unpooling_layer_kernel = arm_compute::support::cpp14::make_unique<NEMaxUnpoolingLayerKernel>();
+ _memset_kernel->configure(output, zero_value);
+ _unpooling_layer_kernel->configure(input, indices, output, pool_info);
}
Status NEMaxUnpoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
void NEMaxUnpoolingLayer::run()
{
- NEScheduler::get().schedule(&_memset_kernel, Window::DimY);
- NEScheduler::get().schedule(&_unpooling_layer_kernel, Window::DimY);
+ NEScheduler::get().schedule(_memset_kernel.get(), Window::DimY);
+ NEScheduler::get().schedule(_unpooling_layer_kernel.get(), Window::DimY);
}
} /* namespace arm_compute */
/*
- * Copyright (c) 2016, 2017 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEMeanStdDevKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEMeanStdDev::~NEMeanStdDev() = default;
NEMeanStdDev::NEMeanStdDev()
: _mean_stddev_kernel(), _fill_border_kernel(), _global_sum(0), _global_sum_squared(0)
void NEMeanStdDev::configure(IImage *input, float *mean, float *stddev)
{
- _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared);
- _fill_border_kernel.configure(input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
+ _mean_stddev_kernel = arm_compute::support::cpp14::make_unique<NEMeanStdDevKernel>();
+ _fill_border_kernel = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+
+ _mean_stddev_kernel->configure(input, mean, &_global_sum, stddev, &_global_sum_squared);
+ _fill_border_kernel->configure(input, _mean_stddev_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
}
void NEMeanStdDev::run()
_global_sum = 0;
_global_sum_squared = 0;
- NEScheduler::get().schedule(&_fill_border_kernel, Window::DimZ);
- NEScheduler::get().schedule(&_mean_stddev_kernel, Window::DimY);
+ NEScheduler::get().schedule(_fill_border_kernel.get(), Window::DimZ);
+ NEScheduler::get().schedule(_mean_stddev_kernel.get(), Window::DimY);
}
+} // namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h"
-#include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h"
+#include "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
{
+NEMeanStdDevNormalizationLayer::~NEMeanStdDevNormalizationLayer() = default;
+
void NEMeanStdDevNormalizationLayer::configure(ITensor *input, ITensor *output, float epsilon)
{
auto k = arm_compute::support::cpp14::make_unique<NEMeanStdDevNormalizationKernel>();
*/
#include "arm_compute/runtime/NEON/functions/NEMedian3x3.h"
-#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEMedian3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NEMedian3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value)
{
auto k = arm_compute::support::cpp14::make_unique<NEMedian3x3Kernel>();
k->configure(input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
\ No newline at end of file
/*
- * Copyright (c) 2016, 2017 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/NEON/functions/NEMinMaxLocation.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEMinMaxLocationKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEMinMaxLocation::~NEMinMaxLocation() = default;
NEMinMaxLocation::NEMinMaxLocation()
: _min_max(), _min_max_loc()
void NEMinMaxLocation::configure(const IImage *input, void *min, void *max, ICoordinates2DArray *min_loc, ICoordinates2DArray *max_loc, uint32_t *min_count, uint32_t *max_count)
{
- _min_max.configure(input, min, max);
- _min_max_loc.configure(input, min, max, min_loc, max_loc, min_count, max_count);
+ _min_max = arm_compute::support::cpp14::make_unique<NEMinMaxKernel>();
+ _min_max->configure(input, min, max);
+
+ _min_max_loc = arm_compute::support::cpp14::make_unique<NEMinMaxLocationKernel>();
+ _min_max_loc->configure(input, min, max, min_loc, max_loc, min_count, max_count);
}
void NEMinMaxLocation::run()
{
- _min_max.reset();
+ _min_max->reset();
/* Run min max kernel */
- NEScheduler::get().schedule(&_min_max, Window::DimY);
+ NEScheduler::get().schedule(_min_max.get(), Window::DimY);
/* Run min max location */
- NEScheduler::get().schedule(&_min_max_loc, Window::DimY);
+ NEScheduler::get().schedule(_min_max_loc.get(), Window::DimY);
}
+} // namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h"
-#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NENonLinearFilterKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NENonLinearFilter::configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
BorderMode border_mode,
uint8_t constant_border_value)
auto k = arm_compute::support::cpp14::make_unique<NENonLinearFilterKernel>();
k->configure(input, output, function, mask_size, pattern, mask, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h"
-#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NENonMaximaSuppression3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode)
{
auto k = arm_compute::support::cpp14::make_unique<NENonMaximaSuppression3x3Kernel>();
k->configure(input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
if(border_mode != BorderMode::UNDEFINED)
{
- _border_handler.configure(input, BorderSize(1), BorderMode::CONSTANT, static_cast<float>(0.f));
+ b->configure(input, BorderSize(1), BorderMode::CONSTANT, static_cast<float>(0.f));
}
else
{
- _border_handler.configure(input, BorderSize(1), BorderMode::UNDEFINED, static_cast<float>(0.f));
+ b->configure(input, BorderSize(1), BorderMode::UNDEFINED, static_cast<float>(0.f));
}
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
\ No newline at end of file
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NENormalizationLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NENormalizationLayer::~NENormalizationLayer() = default;
+
NENormalizationLayer::NENormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _norm_kernel(), _multiply_f(), _input_squared()
{
_memory_group.manage(&_input_squared);
// Configure kernels
- _norm_kernel.configure(input, &_input_squared, output, norm_info);
+ _norm_kernel = arm_compute::support::cpp14::make_unique<NENormalizationLayerKernel>();
+ _norm_kernel->configure(input, &_input_squared, output, norm_info);
_multiply_f.configure(input, input, &_input_squared, 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
// Allocate the tensor once the configure methods have been called
{
MemoryGroupResourceScope scope_mg(_memory_group);
_multiply_f.run();
- NEScheduler::get().schedule(&_norm_kernel, Window::DimY);
+ NEScheduler::get().schedule(_norm_kernel.get(), Window::DimY);
}
}
\ No newline at end of file
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/Pyramid.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NELKTrackerKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEOpticalFlow::~NEOpticalFlow() = default;
NEOpticalFlow::NEOpticalFlow(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
_func_scharr[i].configure(old_ith_input, &_scharr_gx[i], &_scharr_gy[i], border_mode, constant_border_value);
// Init Lucas-Kanade kernel
- _kernel_tracker[i].configure(old_ith_input, new_ith_input, &_scharr_gx[i], &_scharr_gy[i],
- old_points, new_points_estimates, new_points,
- &_old_points_internal, &_new_points_internal,
- termination, use_initial_estimate, epsilon, num_iterations, window_dimension,
- i, _num_levels, pyr_scale);
+ _kernel_tracker[i] = arm_compute::support::cpp14::make_unique<NELKTrackerKernel>();
+ _kernel_tracker[i]->configure(old_ith_input, new_ith_input, &_scharr_gx[i], &_scharr_gy[i],
+ old_points, new_points_estimates, new_points,
+ &_old_points_internal, &_new_points_internal,
+ termination, use_initial_estimate, epsilon, num_iterations, window_dimension,
+ i, _num_levels, pyr_scale);
_scharr_gx[i].allocator()->allocate();
_scharr_gy[i].allocator()->allocate();
_func_scharr[level - 1].run();
// Run Lucas-Kanade kernel
- NEScheduler::get().schedule(&_kernel_tracker[level - 1], Window::DimX);
+ NEScheduler::get().schedule(_kernel_tracker[level - 1].get(), Window::DimX);
}
}
+} // namespace arm_compute
#include "arm_compute/runtime/NEON/functions/NEPReluLayer.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h"
+#include "src/core/NEON/kernels/NEElementwiseOperationKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/NEON/kernels/NECopyKernel.h"
+#include "src/core/NEON/kernels/NEPadLayerKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
}
} // namespace
+NEPadLayer::~NEPadLayer() = default;
+
NEPadLayer::NEPadLayer()
: _copy_kernel(), _pad_kernel(), _mode(), _padding(), _num_dimensions(0), _slice_functions(), _concat_functions(), _slice_results(), _concat_results()
{
void NEPadLayer::configure_constant_mode(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value)
{
- _pad_kernel.configure(input, output, padding, constant_value, PaddingMode::CONSTANT);
+ _pad_kernel = arm_compute::support::cpp14::make_unique<NEPadLayerKernel>();
+ _pad_kernel->configure(input, output, padding, constant_value, PaddingMode::CONSTANT);
}
void NEPadLayer::configure_reflect_symmetric_mode(ITensor *input, ITensor *output)
else
{
// Copy the input to the whole output if no padding is applied
- _copy_kernel.configure(input, output);
+ _copy_kernel = arm_compute::support::cpp14::make_unique<NECopyKernel>();
+ _copy_kernel->configure(input, output);
}
}
{
case PaddingMode::CONSTANT:
{
- NEScheduler::get().schedule(&_pad_kernel, Window::DimZ);
+ NEScheduler::get().schedule(_pad_kernel.get(), Window::DimZ);
break;
}
case PaddingMode::REFLECT:
}
else
{
- NEScheduler::get().schedule(&_copy_kernel, Window::DimY);
+ NEScheduler::get().schedule(_copy_kernel.get(), Window::DimY);
}
}
} // namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEPermute.h"
-#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h"
+#include "src/core/NEON/kernels/NEPermuteKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEPhase.h"
-#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h"
+#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NEPhase::configure(const ITensor *input1, const ITensor *input2, ITensor *output, PhaseType phase_type)
{
if(phase_type == PhaseType::UNSIGNED)
_kernel = std::move(k);
}
}
+} // namespace arm_compute
\ No newline at end of file
#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
+#include "src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
#include "support/MemorySupport.h"
#include <utility>
#include "arm_compute/core/ITensor.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEPoolingLayerKernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NEPoolingLayer::~NEPoolingLayer() = default;
NEPoolingLayer::NEPoolingLayer()
: _pooling_layer_kernel(), _border_handler(), _is_global_pooling_layer(false), _data_layout(DataLayout::NCHW)
_data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? input->info()->data_layout() : pool_info.data_layout;
// Configure pooling kernel
- _pooling_layer_kernel.configure(input, output, pool_info, indices);
+ _pooling_layer_kernel = arm_compute::support::cpp14::make_unique<NEPoolingLayerKernel>();
+ _pooling_layer_kernel->configure(input, output, pool_info, indices);
switch(_data_layout)
{
{
zero_value = PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
}
- _border_handler.configure(input, _pooling_layer_kernel.border_size(), border_mode, zero_value);
+ _border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _border_handler->configure(input, _pooling_layer_kernel->border_size(), border_mode, zero_value);
break;
}
case DataLayout::NHWC:
{
case DataLayout::NCHW:
// Fill border
- NEScheduler::get().schedule(&_border_handler, Window::DimY);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimY);
// Run pooling layer
- NEScheduler::get().schedule(&_pooling_layer_kernel, _is_global_pooling_layer ? Window::DimZ : Window::DimY);
+ NEScheduler::get().schedule(_pooling_layer_kernel.get(), _is_global_pooling_layer ? Window::DimZ : Window::DimY);
break;
case DataLayout::NHWC:
// Run pooling layer
- NEScheduler::get().schedule(&_pooling_layer_kernel, Window::DimX);
+ NEScheduler::get().schedule(_pooling_layer_kernel.get(), Window::DimX);
break;
default:
ARM_COMPUTE_ERROR("Data layout not supported");
}
}
+
+} // namespace arm_compute
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEPriorBoxLayerKernel.h"
#include "support/MemorySupport.h"
#include "arm_compute/core/utils/misc/InfoHelpers.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
}
} // namespace
+Status NEQLSTMLayer::validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
+{
+ // Output quantization scale will be different, but ignored here
+ // since it will be configured at configure() stage.
+ const TensorInfo out
+ {
+ in
+ };
+ return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
+}
+
+void NEQLSTMLayer::configure_layer_norm(NEQLSTMLayer::LayerNormGate g, const ITensor *in)
+{
+ ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
+
+ Tensor &out = get_layer_norm_output(g);
+ _memory_group.manage(&out);
+ out.allocator()->init(*(in->info()));
+
+ get_layer_norm(g) = arm_compute::support::cpp14::make_unique<NEQLSTMLayerNormalizationKernel>();
+ get_layer_norm(g)->configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g));
+}
+
+NEQLSTMLayer::TensorCopyKernel::~TensorCopyKernel() = default;
+
Status NEQLSTMLayer::TensorCopyKernel::validate(const ITensorInfo &src, const ITensorInfo &dst)
{
ARM_COMPUTE_RETURN_ERROR_ON(src.tensor_shape().num_dimensions() > max_dimension_supported);
input_iter, output_iter);
}
+NEQLSTMLayer::~NEQLSTMLayer() = default;
+
NEQLSTMLayer::NEQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(), _transpose_input_to_forget_weights(), _transpose_input_to_cell_weights(), _transpose_input_to_output_weights(), _transpose_input_to_input_weights(),
+ _transpose_recurrent_to_forget_weights(), _transpose_recurrent_to_cell_weights(), _transpose_recurrent_to_output_weights(), _transpose_recurrent_to_input_weights(), _transpose_projection_weights(),
+ _input_to_input_reduction(), _recurrent_to_input_reduction(), _input_to_forget_reduction(), _recurrent_to_forget_reduction(), _input_to_cell_reduction(), _recurrent_to_cell_reduction(),
+ _input_to_output_reduction(), _recurrent_to_output_reduction(), _projection_reduction(), _projection_bias_add(), _mm_input_to_forget(), _mm_recurrent_to_forget(), _pixelwise_mul_cell_to_forget(),
+ _input_to_forget_outstage(), _recurrent_to_forget_outstage(), _cell_to_forget_outstage(), _accumulate_input_recurrent_forget(), _accumulate_cell_forget(), _forget_gate_sigmoid(), _mm_input_to_cell(),
+ _input_to_cell_outstage(), _mm_recurrent_to_cell(), _recurrent_to_cell_outstage(), _accumulate_input_recurrent_modulation(), _cell_gate_tanh(), _input_gate_sub(), _mm_input_to_input(),
+ _input_to_input_outstage(), _mm_recurrent_to_input(), _recurrent_to_input_outstage(), _accumulate_input_recurrent_input(), _pixelwise_mul_cell_to_input(), _cell_to_input_outstage(),
+ _accumulate_cell_input(), _input_gate_sigmoid(), _pixelwise_mul_forget_cell(), _pixelwise_mul_input_cell(), _add_forget_cell(), _cell_clip(), _mm_input_to_output(), _input_to_output_outstage(),
+ _mm_recurrent_to_output(), _recurrent_to_output_outstage(), _accumulate_input_recurrent_output(), _pixelwise_mul_cell_to_output(), _cell_to_output_outstage(), _accumulate_cell_to_output(),
+ _output_gate_sigmoid(), _hidden_tanh(), _pixelwise_mul_hidden(), _hidden_outstage(), _mm_projection(), _projection_outstage(), _accumulate_projection(), _projection_clip(), _projection_bias_copy(),
+ _projection_output_to_accumulate_copy(), _projection_accumulate_to_output_copy(), _hidden_to_output_copy(), _layer_norms(), _copy_output(), _layer_norm_weights(), _layer_norm_bias(),
+ _layer_norm_output()
{
_memory_group = MemoryGroup(std::move(memory_manager));
}
_input_to_input_weights = lstm_params.input_to_input_weights();
_recurrent_to_input_weights = lstm_params.recurrent_to_input_weights();
- _input_to_input_reduction.configure(_input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_input_reduction.configure(_recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_input_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _recurrent_to_input_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _input_to_input_reduction->configure(_input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_input_reduction->configure(_recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
}
- _input_to_forget_reduction.configure(input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_forget_reduction.configure(recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
- _input_to_cell_reduction.configure(input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_cell_reduction.configure(recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
- _input_to_output_reduction.configure(input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_output_reduction.configure(recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+
+ _input_to_forget_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _recurrent_to_forget_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _input_to_cell_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _recurrent_to_cell_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _input_to_output_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _recurrent_to_output_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+
+ _recurrent_to_cell_reduction->configure(input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_forget_reduction->configure(recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_cell_reduction->configure(input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_cell_reduction->configure(recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_output_reduction->configure(input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_output_reduction->configure(recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
if(_has_projection)
{
- _projection_reduction.configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
+ _projection_reduction = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixAReductionKernel>();
+ _projection_reduction->configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
if(_projection_bias != nullptr)
{
_projection_bias_add.configure(_projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output_state_in, output_state_out);
}
- ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(output_state_out, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(output_state_out, output));
return Status{};
}
if(_has_layer_norm)
{
- NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Forget), Window::DimY);
+ NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Forget).get(), Window::DimY);
}
_forget_gate_sigmoid.run();
if(_has_layer_norm)
{
- NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Cell), Window::DimY);
+ NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Cell).get(), Window::DimY);
}
_cell_gate_tanh.run();
if(_has_layer_norm)
{
- NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Input), Window::DimY);
+ NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Input).get(), Window::DimY);
}
_input_gate_sigmoid.run();
if(_has_layer_norm)
{
- NEScheduler::get().schedule(&get_layer_norm(LayerNormGate::Output), Window::DimY);
+ NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Output).get(), Window::DimY);
}
_output_gate_sigmoid.run();
}
// Copy output_state_out to output
- NEScheduler::get().schedule(&_copy_output, Window::DimY);
+ _copy_output.run();
}
void NEQLSTMLayer::prepare()
{
_input_to_input_eff_bias.allocator()->allocate();
_recurrent_to_input_eff_bias.allocator()->allocate();
- NEScheduler::get().schedule(&_input_to_input_reduction, Window::DimY);
- NEScheduler::get().schedule(&_recurrent_to_input_reduction, Window::DimY);
+ NEScheduler::get().schedule(_input_to_input_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_recurrent_to_input_reduction.get(), Window::DimY);
_input_to_input_weights_transposed.allocator()->allocate();
_recurrent_to_input_weights_transposed.allocator()->allocate();
_recurrent_to_cell_eff_bias.allocator()->allocate();
_input_to_output_eff_bias.allocator()->allocate();
_recurrent_to_output_eff_bias.allocator()->allocate();
- NEScheduler::get().schedule(&_input_to_forget_reduction, Window::DimY);
- NEScheduler::get().schedule(&_recurrent_to_forget_reduction, Window::DimY);
- NEScheduler::get().schedule(&_input_to_cell_reduction, Window::DimY);
- NEScheduler::get().schedule(&_recurrent_to_cell_reduction, Window::DimY);
- NEScheduler::get().schedule(&_input_to_output_reduction, Window::DimY);
- NEScheduler::get().schedule(&_recurrent_to_output_reduction, Window::DimY);
+ NEScheduler::get().schedule(_input_to_forget_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_recurrent_to_forget_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_input_to_cell_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_recurrent_to_cell_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_input_to_output_reduction.get(), Window::DimY);
+ NEScheduler::get().schedule(_recurrent_to_output_reduction.get(), Window::DimY);
if(_has_projection)
{
_projection_eff_bias.allocator()->allocate();
- NEScheduler::get().schedule(&_projection_reduction, Window::DimY);
+ NEScheduler::get().schedule(_projection_reduction.get(), Window::DimY);
if(_projection_bias != nullptr)
{
_projection_bias_add.run();
_is_prepared = true;
}
}
-
} // namespace arm_compute
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEQuantizationLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
+#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
+#include "src/core/NEON/kernels/NECopyKernel.h"
+#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NERNNLayer::~NERNNLayer() = default;
+
NERNNLayer::NERNNLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_f(), _activation(), _fully_connected(memory_manager), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(),
_is_prepared(false)
_activation.configure(&_add_output, hidden_state, info);
_add_output.allocator()->allocate();
- _copy_kernel.configure(hidden_state, output);
+ _copy_kernel = arm_compute::support::cpp14::make_unique<NECopyKernel>();
+ _copy_kernel->configure(hidden_state, output);
}
void NERNNLayer::run()
_activation.run();
// copy hidden out to output
- NEScheduler::get().schedule(&_copy_kernel, Window::DimY);
+ NEScheduler::get().schedule(_copy_kernel.get(), Window::DimY);
}
void NERNNLayer::prepare()
*/
#include "arm_compute/runtime/NEON/functions/NEROIAlignLayer.h"
-#include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEROIAlignLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEROIPoolingLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEROIPoolingLayer::~NEROIPoolingLayer() = default;
+
NEROIPoolingLayer::NEROIPoolingLayer()
: _roi_kernel()
{
void NEROIPoolingLayer::configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info)
{
- _roi_kernel.configure(input, rois, output, pool_info);
+ _roi_kernel = arm_compute::support::cpp14::make_unique<NEROIPoolingLayerKernel>();
+ _roi_kernel->configure(input, rois, output, pool_info);
}
void NEROIPoolingLayer::run()
{
- NEScheduler::get().schedule(&_roi_kernel, Window::DimX);
+ NEScheduler::get().schedule(_roi_kernel.get(), Window::DimX);
}
} // namespace arm_compute
\ No newline at end of file
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/NEON/functions/NERange.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NERangeKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NERange::~NERange() = default;
+
NERange::NERange()
: _kernel()
{
void NERange::configure(ITensor *output, const float start, const float end, const float step)
{
- _kernel.configure(output, start, end, step);
+ _kernel = arm_compute::support::cpp14::make_unique<NERangeKernel>();
+ _kernel->configure(output, start, end, step);
}
Status NERange::validate(const ITensorInfo *output, const float start, const float end, const float step)
void NERange::run()
{
- NEScheduler::get().schedule(&_kernel, Window::DimX);
+ NEScheduler::get().schedule(_kernel.get(), Window::DimX);
}
} // namespace arm_compute
\ No newline at end of file
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
}
} // namespace
+NEReduceMean::~NEReduceMean() = default;
+
NEReduceMean::NEReduceMean(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), _dequant(), _requant(), _reduction_ops(), _keep_dims(), _do_requant(), _input_no_quant(),
_output_no_quant()
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
}
} // namespace
+NEReductionOperation::~NEReductionOperation() = default;
+
NEReductionOperation::NEReductionOperation(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager), _reduction_kernel(), _reshape(), _output_internal(), _window_split(0), _reduction_axis(), _is_reshape_required(false)
{
ARM_COMPUTE_ERROR_THROW_ON(NEReductionOperation::validate(input->info(), output->info(), axis, op, keep_dims));
// Configure reduction kernel
- _reduction_kernel.configure(input, output_internal, axis, op);
+ _reduction_kernel = arm_compute::support::cpp14::make_unique<NEReductionOperationKernel>();
+ _reduction_kernel->configure(input, output_internal, axis, op);
_window_split = reduction_window_split_dimension(axis);
_reduction_axis = axis;
void NEReductionOperation::run()
{
MemoryGroupResourceScope scope_mg(_memory_group);
- NEScheduler::get().schedule(&_reduction_kernel, _window_split);
+ NEScheduler::get().schedule(_reduction_kernel.get(), _window_split);
if(_is_reshape_required)
{
_reshape.run();
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NERemapKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NERemapKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NERemap::configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported");
auto k = arm_compute::support::cpp14::make_unique<NERemapKernel>();
-
k->configure(input, map_x, map_y, output, policy);
-
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEReorgLayer.h"
-#include "arm_compute/core/NEON/kernels/NEReorgLayerKernel.h"
+#include "src/core/NEON/kernels/NEReorgLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
-#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/Types.h"
+#include "src/core/NEON/kernels/NEReshapeLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
{
namespace experimental
{
+NEReshape::~NEReshape() = default;
+
void NEReshape::configure(const ITensorInfo *input, ITensorInfo *output)
{
auto k = arm_compute::support::cpp14::make_unique<NEReshapeLayerKernel>();
*/
#include "arm_compute/runtime/NEON/functions/NEReverse.h"
-#include "arm_compute/core/NEON/kernels/NEReverseKernel.h"
+#include "src/core/NEON/kernels/NEReverseKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEScaleKernel.h"
#include "src/core/utils/ScaleUtils.h"
*/
#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h"
-#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEScharr3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
auto k = arm_compute::support::cpp14::make_unique<NEScharr3x3Kernel>();
k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
*/
#include "arm_compute/runtime/NEON/functions/NESelect.h"
-#include "arm_compute/core/NEON/kernels/NESelectKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/NEON/kernels/NESelectKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
#include "arm_compute/runtime/NEON/functions/NESlice.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/tensor_transform.h"
+#include "src/core/NEON/kernels/NEStridedSliceKernel.h"
#include "support/MemorySupport.h"
*/
#include "arm_compute/runtime/NEON/functions/NESobel3x3.h"
-#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NESobel3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NESobel3x3::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
{
auto k = arm_compute::support::cpp14::make_unique<NESobel3x3Kernel>();
k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
\ No newline at end of file
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NESobel5x5Kernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NESobel5x5::~NESobel5x5() = default;
NESobel5x5::NESobel5x5(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler()
TensorInfo tensor_info(input->info()->tensor_shape(), Format::S16);
+ _sobel_hor = arm_compute::support::cpp14::make_unique<NESobel5x5HorKernel>();
+ _sobel_vert = arm_compute::support::cpp14::make_unique<NESobel5x5VertKernel>();
+ _border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+
if(run_sobel_x && run_sobel_y)
{
_tmp_x.allocator()->init(tensor_info);
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
_tmp_y.allocator()->allocate();
}
{
_tmp_x.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
- _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
}
else if(run_sobel_y)
{
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_y.allocator()->allocate();
}
- _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value));
}
void NESobel5x5::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
MemoryGroupResourceScope scope_mg(_memory_group);
- NEScheduler::get().schedule(&_sobel_hor, Window::DimY);
- NEScheduler::get().schedule(&_sobel_vert, Window::DimY);
+ NEScheduler::get().schedule(_sobel_hor.get(), Window::DimY);
+ NEScheduler::get().schedule(_sobel_vert.get(), Window::DimY);
}
+} // namespace arm_compute
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NESobel7x7Kernel.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+NESobel7x7::~NESobel7x7() = default;
NESobel7x7::NESobel7x7(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler()
const bool run_sobel_y = output_y != nullptr;
TensorInfo tensor_info(input->info()->tensor_shape(), Format::S32);
+ _sobel_hor = arm_compute::support::cpp14::make_unique<NESobel7x7HorKernel>();
+ _sobel_vert = arm_compute::support::cpp14::make_unique<NESobel7x7VertKernel>();
+ _border_handler = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
if(run_sobel_x && run_sobel_y)
{
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
_tmp_y.allocator()->allocate();
}
{
_tmp_x.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
- _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
}
else if(run_sobel_y)
{
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_y.allocator()->allocate();
}
- _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value));
}
void NESobel7x7::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
MemoryGroupResourceScope scope_mg(_memory_group);
- NEScheduler::get().schedule(&_sobel_hor, Window::DimY);
- NEScheduler::get().schedule(&_sobel_vert, Window::DimY);
+ NEScheduler::get().schedule(_sobel_hor.get(), Window::DimY);
+ NEScheduler::get().schedule(_sobel_vert.get(), Window::DimY);
}
+} // namespace arm_compute
\ No newline at end of file
#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h"
+#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h"
#include "src/core/helpers/SoftmaxHelpers.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+template <bool IS_LOG>
+NESoftmaxLayerGeneric<IS_LOG>::~NESoftmaxLayerGeneric() = default;
+
template <bool IS_LOG>
NESoftmaxLayerGeneric<IS_LOG>::NESoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _permute_input(), _permute_output(), _max_kernel(), _softmax_kernel(), _fill_border_kernel(), _max(), _tmp(), _input_permuted(), _output_permuted(),
_memory_group.manage(&_max);
_memory_group.manage(&_tmp);
- // Configure Kernels
- _max_kernel.configure(tmp_input, &_max);
+ // Configure kernels
+ _max_kernel = arm_compute::support::cpp14::make_unique<NELogits1DMaxKernel>();
+ _softmax_kernel = arm_compute::support::cpp14::make_unique<NELogits1DSoftmaxKernel<IS_LOG>>();
+ _max_kernel->configure(tmp_input, &_max);
if(_needs_permute)
{
// Add to the memory manager _output_permuted
_memory_group.manage(&_output_permuted);
// The normalization kernel stores the result in a permuted output tensor
- _softmax_kernel.configure(tmp_input, &_max, &_output_permuted, beta, &_tmp);
+ _softmax_kernel->configure(tmp_input, &_max, &_output_permuted, beta, &_tmp);
_input_permuted.allocator()->allocate();
// Re-permute the permuted output into the requested (4D) output
else
{
// Softmax 2D case
- _fill_border_kernel.configure(tmp_input, _max_kernel.border_size(), BorderMode::REPLICATE);
- _softmax_kernel.configure(tmp_input, &_max, output, beta, &_tmp);
+ _fill_border_kernel = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ _fill_border_kernel->configure(tmp_input, _max_kernel->border_size(), BorderMode::REPLICATE);
+ _softmax_kernel->configure(tmp_input, &_max, output, beta, &_tmp);
}
// Allocate intermediate buffers
{
_permute_input.run();
}
+ else
+ {
+ NEScheduler::get().schedule(_fill_border_kernel.get(), Window::DimY);
+ }
- NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY);
- NEScheduler::get().schedule(&_max_kernel, Window::DimY);
- NEScheduler::get().schedule(&_softmax_kernel, Window::DimY);
+ NEScheduler::get().schedule(_max_kernel.get(), Window::DimY);
+ NEScheduler::get().schedule(_softmax_kernel.get(), Window::DimY);
if(_needs_permute)
{
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEMemsetKernel.h"
+#include "src/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NESpaceToBatchLayer::~NESpaceToBatchLayer() = default;
+
NESpaceToBatchLayer::NESpaceToBatchLayer()
: _space_to_batch_kernel(), _memset_kernel(), _has_padding(false)
{
if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
{
- _has_padding = true;
- _memset_kernel.configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
+ _has_padding = true;
+ _memset_kernel = arm_compute::support::cpp14::make_unique<NEMemsetKernel>();
+ _memset_kernel->configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
}
- _space_to_batch_kernel.configure(input, block_shape, paddings, output);
+ _space_to_batch_kernel = arm_compute::support::cpp14::make_unique<NESpaceToBatchLayerKernel>();
+ _space_to_batch_kernel->configure(input, block_shape, paddings, output);
}
void NESpaceToBatchLayer::configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output)
if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
{
- _has_padding = true;
- _memset_kernel.configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
+ _has_padding = true;
+ _memset_kernel = arm_compute::support::cpp14::make_unique<NEMemsetKernel>();
+ _memset_kernel->configure(output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
}
- _space_to_batch_kernel.configure(input, block_shape_x, block_shape_y, padding_left, padding_right, output);
+ _space_to_batch_kernel = arm_compute::support::cpp14::make_unique<NESpaceToBatchLayerKernel>();
+ _space_to_batch_kernel->configure(input, block_shape_x, block_shape_y, padding_left, padding_right, output);
}
Status NESpaceToBatchLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output)
// Zero out output only if we have paddings
if(_has_padding)
{
- NEScheduler::get().schedule(&_memset_kernel, Window::DimY);
+ NEScheduler::get().schedule(_memset_kernel.get(), Window::DimY);
}
- NEScheduler::get().schedule(&_space_to_batch_kernel, Window::DimY);
+ NEScheduler::get().schedule(_space_to_batch_kernel.get(), Window::DimY);
}
} // namespace arm_compute
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NESpaceToDepthLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NESpaceToDepthLayer::~NESpaceToDepthLayer() = default;
+
NESpaceToDepthLayer::NESpaceToDepthLayer()
: _space_to_depth_kernel()
{
void NESpaceToDepthLayer::configure(const ITensor *input, ITensor *output, int32_t block_shape)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- _space_to_depth_kernel.configure(input, output, block_shape);
+ _space_to_depth_kernel = arm_compute::support::cpp14::make_unique<NESpaceToDepthLayerKernel>();
+ _space_to_depth_kernel->configure(input, output, block_shape);
}
Status NESpaceToDepthLayer::validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
void NESpaceToDepthLayer::run()
{
- NEScheduler::get().schedule(&_space_to_depth_kernel, Window::DimY);
+ NEScheduler::get().schedule(_space_to_depth_kernel.get(), Window::DimY);
}
} // namespace arm_compute
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/NEON/kernels/NEStackLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEStackLayer::~NEStackLayer() = default;
+
NEStackLayer::NEStackLayer() // NOLINT
: _input(),
_stack_kernels(),
for(unsigned int i = 0; i < _num_inputs; i++)
{
- _stack_kernels[i].configure(input[i], axis_u, i, _num_inputs, output);
+ _stack_kernels[i] = arm_compute::support::cpp14::make_unique<NEStackLayerKernel>();
+ _stack_kernels[i]->configure(input[i], axis_u, i, _num_inputs, output);
}
}
{
for(unsigned i = 0; i < _num_inputs; i++)
{
- NEScheduler::get().schedule(&_stack_kernels[i], Window::DimY);
+ NEScheduler::get().schedule(_stack_kernels[i].get(), Window::DimY);
}
}
} // namespace arm_compute
#include "arm_compute/runtime/NEON/functions/NEStridedSlice.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/NEON/kernels/NEStridedSliceKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NETableLookup.h"
-#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h"
+#include "src/core/NEON/kernels/NETableLookupKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/NEON/functions/NEThreshold.h"
-#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h"
+#include "src/core/NEON/kernels/NEThresholdKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/NEON/functions/NETile.h"
-#include "arm_compute/core/NEON/kernels/NETileKernel.h"
+#include "src/core/NEON/kernels/NETileKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/NEON/functions/NETranspose.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
+#include "src/core/NEON/kernels/NETransposeKernel.h"
#include "support/MemorySupport.h"
#include <utility>
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
*/
#include "arm_compute/runtime/NEON/functions/NEUpsampleLayer.h"
-#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h"
+#include "src/core/NEON/kernels/NEUpsampleLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
+NEUpsampleLayer::~NEUpsampleLayer() = default;
+
NEUpsampleLayer::NEUpsampleLayer()
: _kernel(), _data_layout()
{
void NEUpsampleLayer::configure(const ITensor *input, ITensor *output, const Size2D &info, const InterpolationPolicy &policy)
{
_data_layout = input->info()->data_layout();
- _kernel.configure(input, output, info, policy);
+ _kernel = arm_compute::support::cpp14::make_unique<NEUpsampleLayerKernel>();
+ _kernel->configure(input, output, info, policy);
}
void NEUpsampleLayer::run()
{
const auto win = (_data_layout == DataLayout::NCHW) ? Window::DimZ : Window::DimX;
- NEScheduler::get().schedule(&_kernel, win);
+ NEScheduler::get().schedule(_kernel.get(), win);
}
} // namespace arm_compute
#include "arm_compute/runtime/NEON/functions/NEWarpAffine.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEWarpKernel.h"
#include "support/MemorySupport.h"
#include <utility>
ARM_COMPUTE_ERROR("Interpolation type not supported");
}
- _border_handler.configure(input, _kernel->border_size(), border_mode, constant_border_value);
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, constant_border_value);
+ _border_handler = std::move(b);
}
#include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEWarpKernel.h"
#include "support/MemorySupport.h"
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void NEWarpPerspective::configure(ITensor *input, ITensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR("Interpolation type not supported");
}
- _border_handler.configure(input, _kernel->border_size(), border_mode, constant_border_value);
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(input, _kernel->border_size(), border_mode, constant_border_value);
+ _border_handler = std::move(b);
}
+} // namespace arm_compute
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h"
#include "support/MemorySupport.h"
*/
#include "arm_compute/runtime/NEON/functions/NEYOLOLayer.h"
-#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h"
+#include "src/core/NEON/kernels/NEYOLOLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
#include <stdio.h>
#include <vector>
-#include "arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp"
#include "arm_compute/runtime/Array.h"
#include "arm_compute/runtime/Pyramid.h"
#include "arm_compute/runtime/common/LSTMParams.h"
+#include "src/core/NEON/kernels/assembly/arm_gemm.hpp"
#include "utils/TypePrinter.h"
namespace arm_compute
#include "arm_compute/runtime/Array.h"
#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "support/MemorySupport.h"
#include "tests/Globals.h"
/** This template synthetizes an INESimpleFunction which runs the given kernel K */
template <typename K>
-class NESynthetizeFunction : public INESimpleFunction
+class NESynthetizeFunction : public INESimpleFunctionNoBorder
{
public:
/** Configure the kernel.
auto k = arm_compute::support::cpp14::make_unique<K>();
k->configure(first, std::forward<Args>(args)...);
_kernel = std::move(k);
- _border_handler.configure(first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue());
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue());
+ _border_handler = std::move(b);
}
};
auto k = arm_compute::support::cpp14::make_unique<K>();
k->configure(first, std::forward<Args>(args)...);
_kernel = std::move(k);
- _border_handler.configure(first, BorderSize(_kernel->border_size()), BorderMode::CONSTANT, PixelValue());
+
+ auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
+ b->configure(first, BorderSize(_kernel->border_size()), BorderMode::CONSTANT, PixelValue());
+ _border_handler = std::move(b);
}
};
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
+#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/NEON/Helper.h"
#include "tests/framework/Macros.h"
/*
- * Copyright (c) 2017 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "tests/Globals.h"
#include "tests/NEON/Accessor.h"
#include "tests/datasets/BorderModeDataset.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/NEON/Helper.h"
#include "tests/PaddingCalculator.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
+#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/NEON/Helper.h"
#include "tests/PaddingCalculator.h"