From dbdab85d6e0f96d3361a9e30310367d89953466c Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Fri, 23 Jun 2017 15:42:00 +0100 Subject: [PATCH] arm_compute v17.06 --- README.md | 6 +- SConscript | 199 + SConstruct | 166 +- arm_compute/core/AccessWindowAutoPadding.h | 6 +- arm_compute/core/AccessWindowStatic.h | 14 +- arm_compute/core/AccessWindowTranspose.h | 2 +- arm_compute/core/CL/CLHelpers.h | 61 + arm_compute/core/CL/CLKernels.h | 5 +- arm_compute/core/CL/CLTypes.h | 41 + arm_compute/core/CL/ICLHOG.h | 113 + arm_compute/core/CL/ICLKernel.h | 20 + arm_compute/core/CL/ICLMultiHOG.h | 56 + .../core/CL/kernels/CLActivationLayerKernel.h | 4 +- .../CL/kernels/CLBatchNormalizationLayerKernel.h | 77 + .../core/CL/kernels/CLDepthConcatenateKernel.h | 76 + .../core/CL/kernels/CLGEMMTranspose1xWKernel.h | 3 + .../core/CL/kernels/CLHOGDescriptorKernel.h | 105 + arm_compute/core/CL/kernels/CLHOGDetectorKernel.h | 82 + .../CLLocallyConnectedMatrixMultiplyKernel.h | 68 + .../CL/kernels/CLPixelWiseMultiplicationKernel.h | 6 +- ...htsReshapeKernel.h => CLWeightsReshapeKernel.h} | 79 +- arm_compute/core/CPP/CPPKernels.h | 1 + ...CPPDetectionWindowNonMaximaSuppressionKernel.h} | 29 +- .../CPP/kernels/CPPSortEuclideanDistanceKernel.h | 6 +- arm_compute/core/Coordinates.h | 2 - arm_compute/core/Dimensions.h | 40 +- arm_compute/core/Error.h | 34 + arm_compute/core/FixedPoint.h | 217 + arm_compute/core/FixedPoint.inl | 252 + arm_compute/core/Helpers.h | 115 +- arm_compute/core/Helpers.inl | 72 +- arm_compute/core/IAccessWindow.h | 24 +- arm_compute/core/ITensor.h | 4 +- arm_compute/core/ITensorInfo.h | 195 + arm_compute/core/NEON/NEFixedPoint.h | 686 ++ arm_compute/core/NEON/NEFixedPoint.inl | 1018 ++ arm_compute/core/NEON/NEKernels.h | 8 +- arm_compute/core/NEON/NEMath.h | 135 +- arm_compute/core/NEON/NEMath.inl | 141 + .../core/NEON/kernels/NEAbsoluteDifferenceKernel.h | 4 +- .../core/NEON/kernels/NEActivationLayerKernel.h | 13 +- .../core/NEON/kernels/NEArithmeticAdditionKernel.h | 12 +- .../NEON/kernels/NEArithmeticSubtractionKernel.h | 6 +- .../NEON/kernels/NEBatchNormalizationLayerKernel.h | 78 + arm_compute/core/NEON/kernels/NECol2ImKernel.h | 20 +- .../core/NEON/kernels/NEDepthConcatenateKernel.h | 76 + .../core/NEON/kernels/NEDepthConvertKernel.h | 14 +- .../NEDirectConvolutionLayerBiasAccumulateKernel.h | 74 + .../NEON/kernels/NEDirectConvolutionLayerKernel.h | 76 + arm_compute/core/NEON/kernels/NEFillBorderKernel.h | 2 +- .../core/NEON/kernels/NEFillInnerBorderKernel.h | 2 +- .../core/NEON/kernels/NEGEMMInterleave4x4Kernel.h | 4 +- .../kernels/NEGEMMMatrixAccumulateBiasesKernel.h | 2 +- .../core/NEON/kernels/NEGEMMMatrixAdditionKernel.h | 16 +- .../core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h | 2 +- .../core/NEON/kernels/NEGEMMTranspose1xWKernel.h | 9 +- .../core/NEON/kernels/NEHOGDescriptorKernel.h | 2 +- .../core/NEON/kernels/NEHOGDetectorKernel.h | 1 + .../core/NEON/kernels/NEHarrisCornersKernel.h | 2 +- arm_compute/core/NEON/kernels/NEIm2ColKernel.h | 8 +- .../NELocallyConnectedMatrixMultiplyKernel.h | 64 + .../NEON/kernels/NENonMaximaSuppression3x3Kernel.h | 4 +- .../core/NEON/kernels/NENormalizationLayerKernel.h | 30 +- .../NEON/kernels/NEPixelWiseMultiplicationKernel.h | 21 +- .../core/NEON/kernels/NEPoolingLayerKernel.h | 22 +- arm_compute/core/NEON/kernels/NEScaleKernel.h | 4 +- .../core/NEON/kernels/NESoftmaxLayerKernel.h | 38 +- .../core/NEON/kernels/NETableLookupKernel.h | 2 +- arm_compute/core/NEON/kernels/NETransposeKernel.h | 4 +- ...htsReshapeKernel.h => NEWeightsReshapeKernel.h} | 43 +- arm_compute/core/PixelValue.h | 9 + arm_compute/core/Steps.h | 2 - arm_compute/core/Strides.h | 2 - arm_compute/core/SubTensorInfo.h | 184 + arm_compute/core/TensorInfo.h | 230 +- arm_compute/core/TensorShape.h | 71 +- arm_compute/core/Types.h | 68 +- arm_compute/core/Utils.h | 72 +- arm_compute/core/Validate.h | 258 +- arm_compute/core/Window.h | 24 +- arm_compute/core/Window.inl | 17 +- arm_compute/runtime/CL/CLFunctions.h | 7 + arm_compute/runtime/CL/CLHOG.h | 80 + arm_compute/runtime/CL/CLMultiHOG.h | 56 + arm_compute/runtime/CL/CLScheduler.h | 30 +- arm_compute/runtime/CL/CLSubTensor.h | 99 + .../CL/functions/CLBatchNormalizationLayer.h | 67 + .../runtime/CL/functions/CLConvolutionLayer.h | 77 +- .../runtime/CL/functions/CLDepthConcatenate.h | 69 + .../runtime/CL/functions/CLFullyConnectedLayer.h | 79 +- arm_compute/runtime/CL/functions/CLHOGDescriptor.h | 72 + arm_compute/runtime/CL/functions/CLHOGDetector.h | 78 + arm_compute/runtime/CL/functions/CLHOGGradient.h | 72 + .../runtime/CL/functions/CLHOGMultiDetection.h | 105 + .../runtime/CL/functions/CLLocallyConnectedLayer.h | 79 + arm_compute/runtime/CPP/CPPScheduler.h | 32 +- arm_compute/runtime/IFunction.h | 2 +- arm_compute/runtime/IScheduler.h | 55 + arm_compute/runtime/MultiHOG.h | 2 + arm_compute/runtime/NEON/NEFunctions.h | 4 + arm_compute/runtime/NEON/NEScheduler.h | 4 +- .../runtime/NEON/functions/NEActivationLayer.h | 2 +- .../runtime/NEON/functions/NEArithmeticAddition.h | 8 +- .../NEON/functions/NEArithmeticSubtraction.h | 8 +- .../NEON/functions/NEBatchNormalizationLayer.h | 66 + arm_compute/runtime/NEON/functions/NEConvolution.h | 2 +- .../runtime/NEON/functions/NEConvolutionLayer.h | 83 +- .../runtime/NEON/functions/NEDepthConcatenate.h | 66 + .../runtime/NEON/functions/NEDepthConvert.h | 15 +- .../NEON/functions/NEDirectConvolutionLayer.h | 72 + arm_compute/runtime/NEON/functions/NEFillBorder.h | 2 +- .../runtime/NEON/functions/NEFullyConnectedLayer.h | 77 +- arm_compute/runtime/NEON/functions/NEGEMM.h | 4 +- .../runtime/NEON/functions/NEGEMMInterleave4x4.h | 2 +- .../runtime/NEON/functions/NEGEMMTranspose1xW.h | 2 +- arm_compute/runtime/NEON/functions/NEGaussian5x5.h | 2 +- arm_compute/runtime/NEON/functions/NEHOGDetector.h | 2 + .../runtime/NEON/functions/NEHOGMultiDetection.h | 32 +- .../NEON/functions/NELocallyConnectedLayer.h | 79 + .../runtime/NEON/functions/NEMinMaxLocation.h | 2 +- .../NEON/functions/NENonMaximaSuppression3x3.h | 2 +- .../runtime/NEON/functions/NENormalizationLayer.h | 2 +- .../NEON/functions/NEPixelWiseMultiplication.h | 6 +- .../runtime/NEON/functions/NEPoolingLayer.h | 2 +- .../runtime/NEON/functions/NESoftmaxLayer.h | 3 +- arm_compute/runtime/NEON/functions/NETableLookup.h | 4 +- arm_compute/runtime/NEON/functions/NETranspose.h | 2 +- arm_compute/runtime/OMP/OMPScheduler.h | 68 + arm_compute/runtime/Scheduler.h | 77 + arm_compute/runtime/SingleThreadScheduler.h | 62 + arm_compute/runtime/SubTensor.h | 73 + arm_compute/runtime/Tensor.h | 8 +- arm_compute/runtime/Utils.h | 41 + docs/00_introduction.dox | 563 + docs/01_library.dox | 250 + docs/02_tests.dox | 95 + docs/Doxyfile | 19 +- docs/arm_compute.dox | 718 -- ...tils_8cpp.xhtml => 00__introduction_8dox.xhtml} | 22 +- ...__compute_8dox.xhtml => 01__library_8dox.xhtml} | 15 +- documentation/02__tests_8dox.xhtml | 132 + documentation/_absolute_difference_8cpp.xhtml | 151 + .../_absolute_difference_8cpp_source.xhtml | 177 + documentation/_access_window_auto_padding_8h.xhtml | 11 +- .../_access_window_auto_padding_8h_source.xhtml | 21 +- documentation/_access_window_static_8h.xhtml | 11 +- .../_access_window_static_8h_source.xhtml | 25 +- documentation/_access_window_transpose_8h.xhtml | 11 +- .../_access_window_transpose_8h_source.xhtml | 19 +- documentation/_accumulate_8cpp.xhtml | 151 + documentation/_accumulate_8cpp_source.xhtml | 175 + documentation/_accumulate_squared_8cpp.xhtml | 151 + .../_accumulate_squared_8cpp_source.xhtml | 176 + documentation/_accumulate_weighted_8cpp.xhtml | 151 + .../_accumulate_weighted_8cpp_source.xhtml | 175 + .../_activation_function_dataset_8h.xhtml | 153 + .../_activation_function_dataset_8h_source.xhtml | 149 + documentation/_activation_layer_8h.xhtml | 156 + documentation/_activation_layer_8h_source.xhtml | 150 + documentation/_activation_layer_dataset_8h.js | 8 + documentation/_activation_layer_dataset_8h.xhtml | 169 + .../_activation_layer_dataset_8h_source.xhtml | 153 + documentation/_arithmetic_addition_8cpp.xhtml | 151 + .../_arithmetic_addition_8cpp_source.xhtml | 182 + documentation/_arithmetic_subtraction_8cpp.xhtml | 151 + .../_arithmetic_subtraction_8cpp_source.xhtml | 182 + documentation/_array_8h.xhtml | 15 +- documentation/_array_8h_source.xhtml | 18 +- .../_batch_normalization_layer_8cpp.xhtml | 144 + .../_batch_normalization_layer_8cpp_source.xhtml | 167 + .../_batch_normalization_layer_dataset_8h.js | 6 + .../_batch_normalization_layer_dataset_8h.xhtml | 165 + ...tch_normalization_layer_dataset_8h_source.xhtml | 148 + documentation/_bitwise_not_8cpp.xhtml | 151 + documentation/_bitwise_not_8cpp_source.xhtml | 174 + documentation/_bitwise_or_8cpp.xhtml | 151 + documentation/_bitwise_or_8cpp_source.xhtml | 174 + documentation/_bitwise_xor_8cpp.xhtml | 151 + documentation/_bitwise_xor_8cpp_source.xhtml | 174 + documentation/_border_mode_dataset_8h.xhtml | 153 + documentation/_border_mode_dataset_8h_source.xhtml | 143 + documentation/_box3x3_8cpp.xhtml | 152 + documentation/_box3x3_8cpp_source.xhtml | 178 + documentation/_c_l_2_alex_net_8cpp.js | 4 + documentation/_c_l_2_alex_net_8cpp.xhtml | 179 + documentation/_c_l_2_alex_net_8cpp_source.xhtml | 171 + documentation/_c_l_2_depth_convert_8cpp.xhtml | 151 + .../_c_l_2_depth_convert_8cpp_source.xhtml | 184 + documentation/_c_l_2_fill_border_8cpp.xhtml | 151 + documentation/_c_l_2_fill_border_8cpp_source.xhtml | 172 + documentation/_c_l_2_g_e_m_m_8h.xhtml | 156 + documentation/_c_l_2_g_e_m_m_8h_source.xhtml | 154 + documentation/_c_l_2_helper_8h.js | 5 + documentation/_c_l_2_helper_8h.xhtml | 159 + documentation/_c_l_2_helper_8h_source.xhtml | 147 + documentation/_c_l_2_le_net5_8cpp.js | 4 + documentation/_c_l_2_le_net5_8cpp.xhtml | 177 + documentation/_c_l_2_le_net5_8cpp_source.xhtml | 166 + documentation/_c_l_2_threshold_8cpp.xhtml | 152 + documentation/_c_l_2_threshold_8cpp_source.xhtml | 178 + documentation/_c_l_absolute_difference_8h.xhtml | 9 +- .../_c_l_absolute_difference_8h_source.xhtml | 11 +- .../_c_l_absolute_difference_kernel_8h.xhtml | 9 +- ..._c_l_absolute_difference_kernel_8h_source.xhtml | 13 +- documentation/_c_l_accessor_8h.xhtml | 155 + documentation/_c_l_accessor_8h_source.xhtml | 167 + documentation/_c_l_accumulate_8h.xhtml | 9 +- documentation/_c_l_accumulate_8h_source.xhtml | 11 +- documentation/_c_l_accumulate_kernel_8h.xhtml | 9 +- .../_c_l_accumulate_kernel_8h_source.xhtml | 11 +- documentation/_c_l_activation_layer_8h.xhtml | 11 +- .../_c_l_activation_layer_8h_source.xhtml | 17 +- .../_c_l_activation_layer_kernel_8h.xhtml | 11 +- .../_c_l_activation_layer_kernel_8h_source.xhtml | 17 +- documentation/_c_l_arithmetic_addition_8h.xhtml | 11 +- .../_c_l_arithmetic_addition_8h_source.xhtml | 17 +- .../_c_l_arithmetic_addition_kernel_8h.xhtml | 11 +- ..._c_l_arithmetic_addition_kernel_8h_source.xhtml | 19 +- documentation/_c_l_arithmetic_subtraction_8h.xhtml | 11 +- .../_c_l_arithmetic_subtraction_8h_source.xhtml | 17 +- .../_c_l_arithmetic_subtraction_kernel_8h.xhtml | 11 +- ...l_arithmetic_subtraction_kernel_8h_source.xhtml | 17 +- documentation/_c_l_array_8h.xhtml | 13 +- documentation/_c_l_array_8h_source.xhtml | 19 +- .../_c_l_batch_normalization_layer_8h.xhtml | 152 + .../_c_l_batch_normalization_layer_8h_source.xhtml | 144 + .../_c_l_batch_normalization_layer_kernel_8h.xhtml | 150 + ...atch_normalization_layer_kernel_8h_source.xhtml | 145 + documentation/_c_l_bitwise_and_8h.xhtml | 9 +- documentation/_c_l_bitwise_and_8h_source.xhtml | 11 +- documentation/_c_l_bitwise_and_kernel_8h.xhtml | 9 +- .../_c_l_bitwise_and_kernel_8h_source.xhtml | 13 +- documentation/_c_l_bitwise_not_8h.xhtml | 9 +- documentation/_c_l_bitwise_not_8h_source.xhtml | 11 +- documentation/_c_l_bitwise_not_kernel_8h.xhtml | 9 +- .../_c_l_bitwise_not_kernel_8h_source.xhtml | 11 +- documentation/_c_l_bitwise_or_8h.xhtml | 9 +- documentation/_c_l_bitwise_or_8h_source.xhtml | 11 +- documentation/_c_l_bitwise_or_kernel_8h.xhtml | 9 +- .../_c_l_bitwise_or_kernel_8h_source.xhtml | 13 +- documentation/_c_l_bitwise_xor_8h.xhtml | 9 +- documentation/_c_l_bitwise_xor_8h_source.xhtml | 11 +- documentation/_c_l_bitwise_xor_kernel_8h.xhtml | 9 +- .../_c_l_bitwise_xor_kernel_8h_source.xhtml | 13 +- documentation/_c_l_box3x3_8h.xhtml | 11 +- documentation/_c_l_box3x3_8h_source.xhtml | 15 +- documentation/_c_l_box3x3_kernel_8h.xhtml | 9 +- documentation/_c_l_box3x3_kernel_8h_source.xhtml | 13 +- documentation/_c_l_canny_edge_8h.xhtml | 9 +- documentation/_c_l_canny_edge_8h_source.xhtml | 13 +- documentation/_c_l_canny_edge_kernel_8h.xhtml | 9 +- .../_c_l_canny_edge_kernel_8h_source.xhtml | 15 +- documentation/_c_l_channel_combine_8h.xhtml | 9 +- documentation/_c_l_channel_combine_8h_source.xhtml | 11 +- documentation/_c_l_channel_combine_kernel_8h.xhtml | 9 +- .../_c_l_channel_combine_kernel_8h_source.xhtml | 13 +- documentation/_c_l_channel_extract_8h.xhtml | 11 +- documentation/_c_l_channel_extract_8h_source.xhtml | 17 +- documentation/_c_l_channel_extract_kernel_8h.xhtml | 11 +- .../_c_l_channel_extract_kernel_8h_source.xhtml | 19 +- documentation/_c_l_col2_im_kernel_8h.xhtml | 9 +- documentation/_c_l_col2_im_kernel_8h_source.xhtml | 13 +- documentation/_c_l_color_convert_8h.xhtml | 9 +- documentation/_c_l_color_convert_8h_source.xhtml | 11 +- documentation/_c_l_color_convert_kernel_8h.xhtml | 9 +- .../_c_l_color_convert_kernel_8h_source.xhtml | 13 +- documentation/_c_l_convolution_8h.xhtml | 11 +- documentation/_c_l_convolution_8h_source.xhtml | 15 +- documentation/_c_l_convolution_kernel_8h.xhtml | 9 +- .../_c_l_convolution_kernel_8h_source.xhtml | 17 +- documentation/_c_l_convolution_layer_8h.xhtml | 16 +- .../_c_l_convolution_layer_8h_source.xhtml | 31 +- documentation/_c_l_depth_concatenate_8h.xhtml | 155 + .../_c_l_depth_concatenate_8h_source.xhtml | 144 + .../_c_l_depth_concatenate_kernel_8h.xhtml | 151 + .../_c_l_depth_concatenate_kernel_8h_source.xhtml | 148 + documentation/_c_l_depth_convert_8h.xhtml | 11 +- documentation/_c_l_depth_convert_8h_source.xhtml | 17 +- documentation/_c_l_depth_convert_kernel_8h.xhtml | 11 +- .../_c_l_depth_convert_kernel_8h_source.xhtml | 17 +- documentation/_c_l_derivative_8h.xhtml | 11 +- documentation/_c_l_derivative_8h_source.xhtml | 15 +- documentation/_c_l_derivative_kernel_8h.xhtml | 9 +- .../_c_l_derivative_kernel_8h_source.xhtml | 15 +- documentation/_c_l_dilate_8h.xhtml | 11 +- documentation/_c_l_dilate_8h_source.xhtml | 15 +- documentation/_c_l_dilate_kernel_8h.xhtml | 9 +- documentation/_c_l_dilate_kernel_8h_source.xhtml | 13 +- documentation/_c_l_distribution1_d_8h.xhtml | 9 +- documentation/_c_l_distribution1_d_8h_source.xhtml | 11 +- documentation/_c_l_equalize_histogram_8h.xhtml | 9 +- .../_c_l_equalize_histogram_8h_source.xhtml | 11 +- documentation/_c_l_erode_8h.xhtml | 11 +- documentation/_c_l_erode_8h_source.xhtml | 15 +- documentation/_c_l_erode_kernel_8h.xhtml | 9 +- documentation/_c_l_erode_kernel_8h_source.xhtml | 13 +- documentation/_c_l_fast_corners_8h.xhtml | 11 +- documentation/_c_l_fast_corners_8h_source.xhtml | 16 +- documentation/_c_l_fast_corners_kernel_8h.xhtml | 11 +- .../_c_l_fast_corners_kernel_8h_source.xhtml | 20 +- documentation/_c_l_fill_border_8h.xhtml | 11 +- documentation/_c_l_fill_border_8h_source.xhtml | 15 +- documentation/_c_l_fill_border_kernel_8h.xhtml | 11 +- .../_c_l_fill_border_kernel_8h_source.xhtml | 19 +- documentation/_c_l_fixture_8cpp.js | 4 + documentation/_c_l_fixture_8cpp.xhtml | 161 + documentation/_c_l_fixture_8cpp_source.xhtml | 140 + documentation/_c_l_fixture_8h.xhtml | 155 + documentation/_c_l_fixture_8h_source.xhtml | 139 + documentation/_c_l_fully_connected_layer_8h.xhtml | 12 +- .../_c_l_fully_connected_layer_8h_source.xhtml | 22 +- documentation/_c_l_functions_8h.xhtml | 16 +- documentation/_c_l_functions_8h_source.xhtml | 18 +- documentation/_c_l_g_e_m_m_8h.xhtml | 9 +- documentation/_c_l_g_e_m_m_8h_source.xhtml | 11 +- documentation/_c_l_g_e_m_m_interleave4x4_8h.xhtml | 9 +- .../_c_l_g_e_m_m_interleave4x4_8h_source.xhtml | 11 +- .../_c_l_g_e_m_m_interleave4x4_kernel_8h.xhtml | 9 +- ..._l_g_e_m_m_interleave4x4_kernel_8h_source.xhtml | 13 +- documentation/_c_l_g_e_m_m_lowp_8h.xhtml | 9 +- documentation/_c_l_g_e_m_m_lowp_8h_source.xhtml | 11 +- ..._l_g_e_m_m_lowp_matrix_multiply_kernel_8h.xhtml | 9 +- ...m_m_lowp_matrix_multiply_kernel_8h_source.xhtml | 13 +- ..._e_m_m_matrix_accumulate_biases_kernel_8h.xhtml | 9 +- ...matrix_accumulate_biases_kernel_8h_source.xhtml | 13 +- .../_c_l_g_e_m_m_matrix_addition_kernel_8h.xhtml | 9 +- ..._g_e_m_m_matrix_addition_kernel_8h_source.xhtml | 13 +- .../_c_l_g_e_m_m_matrix_multiply_kernel_8h.xhtml | 9 +- ..._g_e_m_m_matrix_multiply_kernel_8h_source.xhtml | 13 +- .../_c_l_g_e_m_m_transpose1x_w_kernel_8h.xhtml | 9 +- ..._l_g_e_m_m_transpose1x_w_kernel_8h_source.xhtml | 16 +- documentation/_c_l_gaussian3x3_8h.xhtml | 11 +- documentation/_c_l_gaussian3x3_8h_source.xhtml | 15 +- documentation/_c_l_gaussian3x3_kernel_8h.xhtml | 9 +- .../_c_l_gaussian3x3_kernel_8h_source.xhtml | 13 +- documentation/_c_l_gaussian5x5_8h.xhtml | 11 +- documentation/_c_l_gaussian5x5_8h_source.xhtml | 15 +- documentation/_c_l_gaussian5x5_kernel_8h.xhtml | 9 +- .../_c_l_gaussian5x5_kernel_8h_source.xhtml | 11 +- documentation/_c_l_gaussian_pyramid_8h.xhtml | 11 +- .../_c_l_gaussian_pyramid_8h_source.xhtml | 17 +- .../_c_l_gaussian_pyramid_kernel_8h.xhtml | 9 +- .../_c_l_gaussian_pyramid_kernel_8h_source.xhtml | 13 +- documentation/_c_l_h_o_g_8h.xhtml | 154 + documentation/_c_l_h_o_g_8h_source.xhtml | 149 + documentation/_c_l_h_o_g_descriptor_8h.xhtml | 154 + .../_c_l_h_o_g_descriptor_8h_source.xhtml | 150 + .../_c_l_h_o_g_descriptor_kernel_8h.xhtml | 155 + .../_c_l_h_o_g_descriptor_kernel_8h_source.xhtml | 149 + documentation/_c_l_h_o_g_detector_8h.xhtml | 153 + documentation/_c_l_h_o_g_detector_8h_source.xhtml | 150 + documentation/_c_l_h_o_g_detector_kernel_8h.xhtml | 155 + .../_c_l_h_o_g_detector_kernel_8h_source.xhtml | 147 + documentation/_c_l_h_o_g_gradient_8h.xhtml | 156 + documentation/_c_l_h_o_g_gradient_8h_source.xhtml | 150 + documentation/_c_l_h_o_g_multi_detection_8h.xhtml | 157 + .../_c_l_h_o_g_multi_detection_8h_source.xhtml | 154 + documentation/_c_l_harris_corners_8h.xhtml | 11 +- documentation/_c_l_harris_corners_8h_source.xhtml | 16 +- documentation/_c_l_harris_corners_kernel_8h.xhtml | 9 +- .../_c_l_harris_corners_kernel_8h_source.xhtml | 15 +- documentation/_c_l_helpers_8h.js | 8 +- documentation/_c_l_helpers_8h.xhtml | 37 +- documentation/_c_l_helpers_8h_source.xhtml | 26 +- documentation/_c_l_histogram_8h.xhtml | 9 +- documentation/_c_l_histogram_8h_source.xhtml | 11 +- documentation/_c_l_histogram_kernel_8h.xhtml | 9 +- .../_c_l_histogram_kernel_8h_source.xhtml | 13 +- documentation/_c_l_im2_col_kernel_8h.xhtml | 9 +- documentation/_c_l_im2_col_kernel_8h_source.xhtml | 15 +- documentation/_c_l_integral_image_8h.xhtml | 9 +- documentation/_c_l_integral_image_8h_source.xhtml | 11 +- documentation/_c_l_integral_image_kernel_8h.xhtml | 9 +- .../_c_l_integral_image_kernel_8h_source.xhtml | 13 +- documentation/_c_l_kernel_library_8h.xhtml | 9 +- documentation/_c_l_kernel_library_8h_source.xhtml | 14 +- documentation/_c_l_kernels_8h.xhtml | 14 +- documentation/_c_l_kernels_8h_source.xhtml | 16 +- documentation/_c_l_l_k_tracker_kernel_8h.xhtml | 11 +- .../_c_l_l_k_tracker_kernel_8h_source.xhtml | 19 +- documentation/_c_l_laplacian_pyramid_8h.xhtml | 11 +- .../_c_l_laplacian_pyramid_8h_source.xhtml | 15 +- documentation/_c_l_laplacian_reconstruct_8h.xhtml | 11 +- .../_c_l_laplacian_reconstruct_8h_source.xhtml | 15 +- .../_c_l_locally_connected_layer_8h.xhtml | 156 + .../_c_l_locally_connected_layer_8h_source.xhtml | 153 + ...ally_connected_matrix_multiply_kernel_8h.xhtml} | 21 +- ...nnected_matrix_multiply_kernel_8h_source.xhtml} | 31 +- documentation/_c_l_lut_8h.xhtml | 11 +- documentation/_c_l_lut_8h_source.xhtml | 17 +- documentation/_c_l_lut_allocator_8h.xhtml | 9 +- documentation/_c_l_lut_allocator_8h_source.xhtml | 11 +- documentation/_c_l_magnitude_8h.xhtml | 11 +- documentation/_c_l_magnitude_8h_source.xhtml | 17 +- documentation/_c_l_magnitude_phase_kernel_8h.xhtml | 11 +- .../_c_l_magnitude_phase_kernel_8h_source.xhtml | 21 +- documentation/_c_l_mean_std_dev_8h.xhtml | 9 +- documentation/_c_l_mean_std_dev_8h_source.xhtml | 11 +- documentation/_c_l_mean_std_dev_kernel_8h.xhtml | 9 +- .../_c_l_mean_std_dev_kernel_8h_source.xhtml | 13 +- documentation/_c_l_median3x3_8h.xhtml | 11 +- documentation/_c_l_median3x3_8h_source.xhtml | 15 +- documentation/_c_l_median3x3_kernel_8h.xhtml | 9 +- .../_c_l_median3x3_kernel_8h_source.xhtml | 13 +- documentation/_c_l_min_max_location_8h.xhtml | 9 +- .../_c_l_min_max_location_8h_source.xhtml | 15 +- .../_c_l_min_max_location_kernel_8h.xhtml | 9 +- .../_c_l_min_max_location_kernel_8h_source.xhtml | 13 +- documentation/_c_l_multi_h_o_g_8h.xhtml | 153 + documentation/_c_l_multi_h_o_g_8h_source.xhtml | 142 + documentation/_c_l_multi_image_8h.xhtml | 11 +- documentation/_c_l_multi_image_8h_source.xhtml | 17 +- documentation/_c_l_non_linear_filter_8h.xhtml | 11 +- .../_c_l_non_linear_filter_8h_source.xhtml | 19 +- .../_c_l_non_linear_filter_kernel_8h.xhtml | 11 +- .../_c_l_non_linear_filter_kernel_8h_source.xhtml | 21 +- .../_c_l_non_maxima_suppression3x3_8h.xhtml | 11 +- .../_c_l_non_maxima_suppression3x3_8h_source.xhtml | 15 +- .../_c_l_non_maxima_suppression3x3_kernel_8h.xhtml | 9 +- ...on_maxima_suppression3x3_kernel_8h_source.xhtml | 13 +- documentation/_c_l_normalization_layer_8h.xhtml | 11 +- .../_c_l_normalization_layer_8h_source.xhtml | 17 +- .../_c_l_normalization_layer_kernel_8h.xhtml | 9 +- ..._c_l_normalization_layer_kernel_8h_source.xhtml | 17 +- documentation/_c_l_optical_flow_8h.xhtml | 11 +- documentation/_c_l_optical_flow_8h_source.xhtml | 17 +- documentation/_c_l_phase_8h.xhtml | 11 +- documentation/_c_l_phase_8h_source.xhtml | 17 +- .../_c_l_pixel_wise_multiplication_8h.xhtml | 11 +- .../_c_l_pixel_wise_multiplication_8h_source.xhtml | 19 +- .../_c_l_pixel_wise_multiplication_kernel_8h.xhtml | 11 +- ...ixel_wise_multiplication_kernel_8h_source.xhtml | 19 +- documentation/_c_l_pooling_layer_8h.xhtml | 11 +- documentation/_c_l_pooling_layer_8h_source.xhtml | 17 +- documentation/_c_l_pooling_layer_kernel_8h.xhtml | 9 +- .../_c_l_pooling_layer_kernel_8h_source.xhtml | 17 +- documentation/_c_l_pyramid_8h.xhtml | 11 +- documentation/_c_l_pyramid_8h_source.xhtml | 15 +- documentation/_c_l_remap_8h.xhtml | 11 +- documentation/_c_l_remap_8h_source.xhtml | 17 +- documentation/_c_l_remap_kernel_8h.xhtml | 11 +- documentation/_c_l_remap_kernel_8h_source.xhtml | 19 +- documentation/_c_l_scale_8h.xhtml | 11 +- documentation/_c_l_scale_8h_source.xhtml | 17 +- documentation/_c_l_scale_kernel_8h.xhtml | 11 +- documentation/_c_l_scale_kernel_8h_source.xhtml | 19 +- documentation/_c_l_scharr3x3_8h.xhtml | 11 +- documentation/_c_l_scharr3x3_8h_source.xhtml | 15 +- documentation/_c_l_scharr3x3_kernel_8h.xhtml | 9 +- .../_c_l_scharr3x3_kernel_8h_source.xhtml | 15 +- documentation/_c_l_scheduler_8h.xhtml | 15 +- documentation/_c_l_scheduler_8h_source.xhtml | 41 +- documentation/_c_l_sobel3x3_8h.xhtml | 11 +- documentation/_c_l_sobel3x3_8h_source.xhtml | 15 +- documentation/_c_l_sobel3x3_kernel_8h.xhtml | 9 +- documentation/_c_l_sobel3x3_kernel_8h_source.xhtml | 15 +- documentation/_c_l_sobel5x5_8h.xhtml | 11 +- documentation/_c_l_sobel5x5_8h_source.xhtml | 15 +- documentation/_c_l_sobel5x5_kernel_8h.xhtml | 9 +- documentation/_c_l_sobel5x5_kernel_8h_source.xhtml | 15 +- documentation/_c_l_sobel7x7_8h.xhtml | 11 +- documentation/_c_l_sobel7x7_8h_source.xhtml | 15 +- documentation/_c_l_sobel7x7_kernel_8h.xhtml | 9 +- documentation/_c_l_sobel7x7_kernel_8h_source.xhtml | 15 +- documentation/_c_l_softmax_layer_8h.xhtml | 9 +- documentation/_c_l_softmax_layer_8h_source.xhtml | 11 +- documentation/_c_l_softmax_layer_kernel_8h.xhtml | 9 +- .../_c_l_softmax_layer_kernel_8h_source.xhtml | 16 +- documentation/_c_l_sub_tensor_8h.xhtml | 152 + documentation/_c_l_sub_tensor_8h_source.xhtml | 151 + documentation/_c_l_table_lookup_8h.xhtml | 9 +- documentation/_c_l_table_lookup_8h_source.xhtml | 11 +- documentation/_c_l_table_lookup_kernel_8h.xhtml | 9 +- .../_c_l_table_lookup_kernel_8h_source.xhtml | 11 +- documentation/_c_l_tensor_8h.xhtml | 9 +- documentation/_c_l_tensor_8h_source.xhtml | 13 +- documentation/_c_l_tensor_allocator_8h.xhtml | 9 +- .../_c_l_tensor_allocator_8h_source.xhtml | 11 +- documentation/_c_l_threshold_8h.xhtml | 11 +- documentation/_c_l_threshold_8h_source.xhtml | 18 +- documentation/_c_l_threshold_kernel_8h.xhtml | 11 +- .../_c_l_threshold_kernel_8h_source.xhtml | 18 +- documentation/_c_l_transpose_8h.xhtml | 9 +- documentation/_c_l_transpose_8h_source.xhtml | 11 +- documentation/_c_l_transpose_kernel_8h.xhtml | 9 +- .../_c_l_transpose_kernel_8h_source.xhtml | 11 +- documentation/_c_l_types_8h.js | 12 + documentation/_c_l_types_8h.xhtml | 159 + documentation/_c_l_types_8h_source.xhtml | 141 + documentation/_c_l_warp_affine_8h.xhtml | 11 +- documentation/_c_l_warp_affine_8h_source.xhtml | 17 +- documentation/_c_l_warp_affine_kernel_8h.xhtml | 11 +- .../_c_l_warp_affine_kernel_8h_source.xhtml | 19 +- documentation/_c_l_warp_perspective_8h.xhtml | 11 +- .../_c_l_warp_perspective_8h_source.xhtml | 17 +- .../_c_l_warp_perspective_kernel_8h.xhtml | 11 +- .../_c_l_warp_perspective_kernel_8h_source.xhtml | 19 +- documentation/_c_l_weights_reshape_kernel_8h.xhtml | 155 + .../_c_l_weights_reshape_kernel_8h_source.xhtml | 146 + .../_c_p_p_corner_candidates_kernel_8h.xhtml | 9 +- ..._c_p_p_corner_candidates_kernel_8h_source.xhtml | 11 +- ..._window_non_maxima_suppression_kernel_8h.xhtml} | 23 +- ...w_non_maxima_suppression_kernel_8h_source.xhtml | 147 + documentation/_c_p_p_kernels_8h.xhtml | 10 +- documentation/_c_p_p_kernels_8h_source.xhtml | 12 +- documentation/_c_p_p_scheduler_8h.xhtml | 13 +- documentation/_c_p_p_scheduler_8h_source.xhtml | 23 +- .../_c_p_p_sort_euclidean_distance_kernel_8h.xhtml | 13 +- ..._sort_euclidean_distance_kernel_8h_source.xhtml | 15 +- documentation/_convert_policy_dataset_8h.xhtml | 153 + .../_convert_policy_dataset_8h_source.xhtml | 142 + documentation/_convolution_layer_8h.xhtml | 156 + documentation/_convolution_layer_8h_source.xhtml | 152 + documentation/_convolution_layer_dataset_8h.js | 11 + documentation/_convolution_layer_dataset_8h.xhtml | 182 + .../_convolution_layer_dataset_8h_source.xhtml | 162 + documentation/_coordinates_8h.xhtml | 9 +- documentation/_coordinates_8h_source.xhtml | 13 +- documentation/_data_type_datasets_8h.xhtml | 176 + documentation/_data_type_datasets_8h_source.xhtml | 170 + documentation/_dimensions_8h.xhtml | 9 +- documentation/_dimensions_8h_source.xhtml | 43 +- documentation/_distribution1_d_8h.xhtml | 9 +- documentation/_distribution1_d_8h_source.xhtml | 11 +- documentation/_error_8h.js | 3 + documentation/_error_8h.xhtml | 84 +- documentation/_error_8h_source.xhtml | 12 +- documentation/_exp___q_s8_8cpp.xhtml | 151 + documentation/_exp___q_s8_8cpp_source.xhtml | 176 + documentation/_fixed_point_8cpp.xhtml | 142 + documentation/_fixed_point_8cpp_source.xhtml | 149 + documentation/_fixed_point_8inl.js | 23 + documentation/_fixed_point_8inl.xhtml | 207 + documentation/_fixed_point_8inl_source.xhtml | 160 + documentation/_fully_connected_layer_8h.xhtml | 157 + .../_fully_connected_layer_8h_source.xhtml | 152 + documentation/_fully_connected_layer_dataset_8h.js | 10 + .../_fully_connected_layer_dataset_8h.xhtml | 172 + .../_fully_connected_layer_dataset_8h_source.xhtml | 158 + documentation/_g_e_m_m_dataset_8h.xhtml | 166 + documentation/_g_e_m_m_dataset_8h_source.xhtml | 157 + documentation/_generic_dataset_8h.xhtml | 154 + documentation/_generic_dataset_8h_source.xhtml | 141 + documentation/_globals_8h.xhtml | 146 + documentation/_globals_8h_source.xhtml | 135 + documentation/_h_o_g_8h.xhtml | 11 +- documentation/_h_o_g_8h_source.xhtml | 15 +- documentation/_h_o_g_info_8h.xhtml | 11 +- documentation/_h_o_g_info_8h_source.xhtml | 19 +- documentation/_helpers_8h.js | 11 - documentation/_helpers_8h.xhtml | 194 - documentation/_helpers_8h_source.xhtml | 166 - documentation/_helpers_8inl.js | 13 + documentation/_helpers_8inl.xhtml | 47 +- documentation/_helpers_8inl_source.xhtml | 66 +- documentation/_i_access_window_8h.xhtml | 11 +- documentation/_i_access_window_8h_source.xhtml | 31 +- documentation/_i_accessor_8h.xhtml | 153 + documentation/_i_accessor_8h_source.xhtml | 150 + documentation/_i_array_8h.xhtml | 11 +- documentation/_i_array_8h_source.xhtml | 16 +- documentation/_i_c_l_array_8h.xhtml | 11 +- documentation/_i_c_l_array_8h_source.xhtml | 11 +- documentation/_i_c_l_distribution1_d_8h.xhtml | 9 +- .../_i_c_l_distribution1_d_8h_source.xhtml | 11 +- documentation/_i_c_l_h_o_g_8h.xhtml | 153 + documentation/_i_c_l_h_o_g_8h_source.xhtml | 137 + documentation/_i_c_l_kernel_8h.xhtml | 12 +- documentation/_i_c_l_kernel_8h_source.xhtml | 21 +- documentation/_i_c_l_lut_8h.xhtml | 9 +- documentation/_i_c_l_lut_8h_source.xhtml | 11 +- documentation/_i_c_l_multi_h_o_g_8h.xhtml | 151 + documentation/_i_c_l_multi_h_o_g_8h_source.xhtml | 141 + documentation/_i_c_l_multi_image_8h.xhtml | 9 +- documentation/_i_c_l_multi_image_8h_source.xhtml | 11 +- documentation/_i_c_l_simple2_d_kernel_8h.xhtml | 9 +- .../_i_c_l_simple2_d_kernel_8h_source.xhtml | 11 +- documentation/_i_c_l_simple3_d_kernel_8h.xhtml | 9 +- .../_i_c_l_simple3_d_kernel_8h_source.xhtml | 11 +- documentation/_i_c_l_simple_function_8h.xhtml | 9 +- .../_i_c_l_simple_function_8h_source.xhtml | 11 +- documentation/_i_c_l_simple_kernel_8h.xhtml | 11 +- documentation/_i_c_l_simple_kernel_8h_source.xhtml | 19 +- documentation/_i_c_l_tensor_8h.xhtml | 9 +- documentation/_i_c_l_tensor_8h_source.xhtml | 11 +- documentation/_i_c_p_p_kernel_8h.xhtml | 9 +- documentation/_i_c_p_p_kernel_8h_source.xhtml | 11 +- documentation/_i_c_p_p_simple_kernel_8h.xhtml | 9 +- .../_i_c_p_p_simple_kernel_8h_source.xhtml | 13 +- documentation/_i_distribution1_d_8h.xhtml | 9 +- documentation/_i_distribution1_d_8h_source.xhtml | 11 +- documentation/_i_distribution_8h.xhtml | 9 +- documentation/_i_distribution_8h_source.xhtml | 11 +- documentation/_i_function_8h.xhtml | 9 +- documentation/_i_function_8h_source.xhtml | 11 +- documentation/_i_h_o_g_8h.xhtml | 11 +- documentation/_i_h_o_g_8h_source.xhtml | 15 +- documentation/_i_kernel_8h.xhtml | 11 +- documentation/_i_kernel_8h_source.xhtml | 17 +- documentation/_i_lut_8h.xhtml | 11 +- documentation/_i_lut_8h_source.xhtml | 17 +- documentation/_i_lut_allocator_8h.xhtml | 11 +- documentation/_i_lut_allocator_8h_source.xhtml | 17 +- documentation/_i_multi_h_o_g_8h.xhtml | 9 +- documentation/_i_multi_h_o_g_8h_source.xhtml | 11 +- documentation/_i_multi_image_8h.xhtml | 9 +- documentation/_i_multi_image_8h_source.xhtml | 11 +- documentation/_i_n_e_kernel_8h.xhtml | 9 +- documentation/_i_n_e_kernel_8h_source.xhtml | 11 +- documentation/_i_n_e_simple_function_8h.xhtml | 9 +- .../_i_n_e_simple_function_8h_source.xhtml | 11 +- documentation/_i_n_e_simple_kernel_8h.xhtml | 9 +- documentation/_i_n_e_simple_kernel_8h_source.xhtml | 11 +- documentation/_i_pyramid_8h.xhtml | 11 +- documentation/_i_pyramid_8h_source.xhtml | 15 +- documentation/_i_scheduler_8h.xhtml | 149 + documentation/_i_scheduler_8h_source.xhtml | 139 + documentation/_i_tensor_8h.xhtml | 9 +- documentation/_i_tensor_8h_source.xhtml | 19 +- documentation/_i_tensor_allocator_8h.xhtml | 11 +- documentation/_i_tensor_allocator_8h_source.xhtml | 17 +- documentation/_i_tensor_info_8h.xhtml | 155 + documentation/_i_tensor_info_8h_source.xhtml | 172 + documentation/_image_datasets_8h.xhtml | 159 + documentation/_image_datasets_8h_source.xhtml | 144 + documentation/_instrument_8h.xhtml | 162 + documentation/_instrument_8h_source.xhtml | 146 + documentation/_integral_image_8cpp.xhtml | 151 + documentation/_integral_image_8cpp_source.xhtml | 176 + .../_interpolation_policy_dataset_8h.xhtml | 152 + .../_interpolation_policy_dataset_8h_source.xhtml | 143 + documentation/_invsqrt___q_s8_8cpp.xhtml | 151 + documentation/_invsqrt___q_s8_8cpp_source.xhtml | 177 + documentation/_log___q_s8_8cpp.xhtml | 151 + documentation/_log___q_s8_8cpp_source.xhtml | 176 + documentation/_lut_8h.xhtml | 11 +- documentation/_lut_8h_source.xhtml | 17 +- documentation/_lut_allocator_8h.xhtml | 9 +- documentation/_lut_allocator_8h_source.xhtml | 11 +- documentation/_multi_h_o_g_8h.xhtml | 14 +- documentation/_multi_h_o_g_8h_source.xhtml | 19 +- documentation/_multi_image_8h.xhtml | 13 +- documentation/_multi_image_8h_source.xhtml | 21 +- documentation/_multi_image_info_8h.xhtml | 11 +- documentation/_multi_image_info_8h_source.xhtml | 17 +- documentation/_n_e_absolute_difference_8h.xhtml | 9 +- .../_n_e_absolute_difference_8h_source.xhtml | 11 +- .../_n_e_absolute_difference_kernel_8h.xhtml | 9 +- ..._n_e_absolute_difference_kernel_8h_source.xhtml | 11 +- documentation/_n_e_accessor_8h.xhtml | 155 + documentation/_n_e_accessor_8h_source.xhtml | 163 + documentation/_n_e_accumulate_8h.xhtml | 9 +- documentation/_n_e_accumulate_8h_source.xhtml | 11 +- documentation/_n_e_accumulate_kernel_8h.xhtml | 9 +- .../_n_e_accumulate_kernel_8h_source.xhtml | 11 +- documentation/_n_e_activation_layer_8h.xhtml | 11 +- .../_n_e_activation_layer_8h_source.xhtml | 17 +- .../_n_e_activation_layer_kernel_8h.xhtml | 12 +- .../_n_e_activation_layer_kernel_8h_source.xhtml | 20 +- documentation/_n_e_arithmetic_addition_8h.xhtml | 11 +- .../_n_e_arithmetic_addition_8h_source.xhtml | 19 +- .../_n_e_arithmetic_addition_kernel_8h.xhtml | 11 +- ..._n_e_arithmetic_addition_kernel_8h_source.xhtml | 17 +- documentation/_n_e_arithmetic_subtraction_8h.xhtml | 11 +- .../_n_e_arithmetic_subtraction_8h_source.xhtml | 19 +- .../_n_e_arithmetic_subtraction_kernel_8h.xhtml | 11 +- ...e_arithmetic_subtraction_kernel_8h_source.xhtml | 17 +- .../_n_e_batch_normalization_layer_8h.xhtml | 152 + .../_n_e_batch_normalization_layer_8h_source.xhtml | 143 + .../_n_e_batch_normalization_layer_kernel_8h.xhtml | 150 + ...atch_normalization_layer_kernel_8h_source.xhtml | 144 + documentation/_n_e_bitwise_and_8h.xhtml | 9 +- documentation/_n_e_bitwise_and_8h_source.xhtml | 11 +- documentation/_n_e_bitwise_and_kernel_8h.xhtml | 9 +- .../_n_e_bitwise_and_kernel_8h_source.xhtml | 11 +- documentation/_n_e_bitwise_not_8h.xhtml | 9 +- documentation/_n_e_bitwise_not_8h_source.xhtml | 11 +- documentation/_n_e_bitwise_not_kernel_8h.xhtml | 9 +- .../_n_e_bitwise_not_kernel_8h_source.xhtml | 11 +- documentation/_n_e_bitwise_or_8h.xhtml | 9 +- documentation/_n_e_bitwise_or_8h_source.xhtml | 11 +- documentation/_n_e_bitwise_or_kernel_8h.xhtml | 9 +- .../_n_e_bitwise_or_kernel_8h_source.xhtml | 11 +- documentation/_n_e_bitwise_xor_8h.xhtml | 9 +- documentation/_n_e_bitwise_xor_8h_source.xhtml | 11 +- documentation/_n_e_bitwise_xor_kernel_8h.xhtml | 9 +- .../_n_e_bitwise_xor_kernel_8h_source.xhtml | 11 +- documentation/_n_e_box3x3_8h.xhtml | 11 +- documentation/_n_e_box3x3_8h_source.xhtml | 15 +- documentation/_n_e_box3x3_kernel_8h.xhtml | 9 +- documentation/_n_e_box3x3_kernel_8h_source.xhtml | 13 +- documentation/_n_e_canny_edge_8h.xhtml | 13 +- documentation/_n_e_canny_edge_8h_source.xhtml | 19 +- documentation/_n_e_canny_edge_kernel_8h.xhtml | 9 +- .../_n_e_canny_edge_kernel_8h_source.xhtml | 13 +- documentation/_n_e_channel_combine_8h.xhtml | 9 +- documentation/_n_e_channel_combine_8h_source.xhtml | 11 +- documentation/_n_e_channel_combine_kernel_8h.xhtml | 9 +- .../_n_e_channel_combine_kernel_8h_source.xhtml | 11 +- documentation/_n_e_channel_extract_8h.xhtml | 11 +- documentation/_n_e_channel_extract_8h_source.xhtml | 17 +- documentation/_n_e_channel_extract_kernel_8h.xhtml | 11 +- .../_n_e_channel_extract_kernel_8h_source.xhtml | 17 +- documentation/_n_e_col2_im_kernel_8h.xhtml | 9 +- documentation/_n_e_col2_im_kernel_8h_source.xhtml | 13 +- documentation/_n_e_color_convert_8h.xhtml | 9 +- documentation/_n_e_color_convert_8h_source.xhtml | 11 +- documentation/_n_e_color_convert_helper_8inl.xhtml | 11 +- .../_n_e_color_convert_helper_8inl_source.xhtml | 31 +- documentation/_n_e_color_convert_kernel_8h.xhtml | 9 +- .../_n_e_color_convert_kernel_8h_source.xhtml | 11 +- documentation/_n_e_convolution_8h.xhtml | 13 +- documentation/_n_e_convolution_8h_source.xhtml | 19 +- documentation/_n_e_convolution_kernel_8h.xhtml | 9 +- .../_n_e_convolution_kernel_8h_source.xhtml | 13 +- documentation/_n_e_convolution_layer_8h.xhtml | 18 +- .../_n_e_convolution_layer_8h_source.xhtml | 37 +- .../_n_e_cumulative_distribution_kernel_8h.xhtml | 9 +- ..._cumulative_distribution_kernel_8h_source.xhtml | 11 +- documentation/_n_e_depth_concatenate_8h.xhtml | 152 + .../_n_e_depth_concatenate_8h_source.xhtml | 140 + .../_n_e_depth_concatenate_kernel_8h.xhtml | 150 + .../_n_e_depth_concatenate_kernel_8h_source.xhtml | 146 + documentation/_n_e_depth_convert_8h.xhtml | 11 +- documentation/_n_e_depth_convert_8h_source.xhtml | 17 +- documentation/_n_e_depth_convert_kernel_8h.xhtml | 11 +- .../_n_e_depth_convert_kernel_8h_source.xhtml | 17 +- documentation/_n_e_derivative_8h.xhtml | 11 +- documentation/_n_e_derivative_8h_source.xhtml | 15 +- documentation/_n_e_derivative_kernel_8h.xhtml | 9 +- .../_n_e_derivative_kernel_8h_source.xhtml | 13 +- documentation/_n_e_dilate_8h.xhtml | 11 +- documentation/_n_e_dilate_8h_source.xhtml | 15 +- documentation/_n_e_dilate_kernel_8h.xhtml | 9 +- documentation/_n_e_dilate_kernel_8h_source.xhtml | 13 +- .../_n_e_direct_convolution_layer_8h.xhtml | 155 + .../_n_e_direct_convolution_layer_8h_source.xhtml | 150 + ...nvolution_layer_bias_accumulate_kernel_8h.xhtml | 150 + ...on_layer_bias_accumulate_kernel_8h_source.xhtml | 144 + .../_n_e_direct_convolution_layer_kernel_8h.xhtml | 150 + ...direct_convolution_layer_kernel_8h_source.xhtml | 147 + documentation/_n_e_equalize_histogram_8h.xhtml | 9 +- .../_n_e_equalize_histogram_8h_source.xhtml | 11 +- documentation/_n_e_erode_8h.xhtml | 11 +- documentation/_n_e_erode_8h_source.xhtml | 15 +- documentation/_n_e_erode_kernel_8h.xhtml | 9 +- documentation/_n_e_erode_kernel_8h_source.xhtml | 13 +- documentation/_n_e_fast_corners_8h.xhtml | 13 +- documentation/_n_e_fast_corners_8h_source.xhtml | 20 +- documentation/_n_e_fast_corners_kernel_8h.xhtml | 11 +- .../_n_e_fast_corners_kernel_8h_source.xhtml | 18 +- documentation/_n_e_fill_array_kernel_8h.xhtml | 11 +- .../_n_e_fill_array_kernel_8h_source.xhtml | 16 +- documentation/_n_e_fill_border_8h.xhtml | 11 +- documentation/_n_e_fill_border_8h_source.xhtml | 15 +- documentation/_n_e_fill_border_kernel_8h.xhtml | 11 +- .../_n_e_fill_border_kernel_8h_source.xhtml | 17 +- .../_n_e_fill_inner_border_kernel_8h.xhtml | 11 +- .../_n_e_fill_inner_border_kernel_8h_source.xhtml | 17 +- documentation/_n_e_fixed_point_8h.js | 89 + documentation/_n_e_fixed_point_8h.xhtml | 413 + documentation/_n_e_fixed_point_8h_source.xhtml | 222 + documentation/_n_e_fixed_point_8inl.js | 80 + documentation/_n_e_fixed_point_8inl.xhtml | 377 + documentation/_n_e_fixed_point_8inl_source.xhtml | 219 + documentation/_n_e_fully_connected_layer_8h.xhtml | 14 +- .../_n_e_fully_connected_layer_8h_source.xhtml | 28 +- documentation/_n_e_functions_8h.xhtml | 13 +- documentation/_n_e_functions_8h_source.xhtml | 15 +- documentation/_n_e_g_e_m_m_8h.xhtml | 11 +- documentation/_n_e_g_e_m_m_8h_source.xhtml | 19 +- documentation/_n_e_g_e_m_m_interleave4x4_8h.xhtml | 9 +- .../_n_e_g_e_m_m_interleave4x4_8h_source.xhtml | 11 +- .../_n_e_g_e_m_m_interleave4x4_kernel_8h.xhtml | 9 +- ..._e_g_e_m_m_interleave4x4_kernel_8h_source.xhtml | 11 +- documentation/_n_e_g_e_m_m_lowp_8h.xhtml | 11 +- documentation/_n_e_g_e_m_m_lowp_8h_source.xhtml | 19 +- ..._e_g_e_m_m_lowp_matrix_multiply_kernel_8h.xhtml | 9 +- ...m_m_lowp_matrix_multiply_kernel_8h_source.xhtml | 11 +- ..._e_m_m_matrix_accumulate_biases_kernel_8h.xhtml | 9 +- ...matrix_accumulate_biases_kernel_8h_source.xhtml | 11 +- .../_n_e_g_e_m_m_matrix_addition_kernel_8h.xhtml | 9 +- ..._g_e_m_m_matrix_addition_kernel_8h_source.xhtml | 15 +- .../_n_e_g_e_m_m_matrix_multiply_kernel_8h.xhtml | 9 +- ..._g_e_m_m_matrix_multiply_kernel_8h_source.xhtml | 11 +- documentation/_n_e_g_e_m_m_transpose1x_w_8h.xhtml | 11 +- .../_n_e_g_e_m_m_transpose1x_w_8h_source.xhtml | 11 +- .../_n_e_g_e_m_m_transpose1x_w_kernel_8h.xhtml | 11 +- ..._e_g_e_m_m_transpose1x_w_kernel_8h_source.xhtml | 15 +- documentation/_n_e_gaussian3x3_8h.xhtml | 11 +- documentation/_n_e_gaussian3x3_8h_source.xhtml | 15 +- documentation/_n_e_gaussian3x3_kernel_8h.xhtml | 9 +- .../_n_e_gaussian3x3_kernel_8h_source.xhtml | 13 +- documentation/_n_e_gaussian5x5_8h.xhtml | 13 +- documentation/_n_e_gaussian5x5_8h_source.xhtml | 19 +- documentation/_n_e_gaussian5x5_kernel_8h.xhtml | 9 +- .../_n_e_gaussian5x5_kernel_8h_source.xhtml | 13 +- documentation/_n_e_gaussian_pyramid_8h.xhtml | 13 +- .../_n_e_gaussian_pyramid_8h_source.xhtml | 17 +- .../_n_e_gaussian_pyramid_kernel_8h.xhtml | 9 +- .../_n_e_gaussian_pyramid_kernel_8h_source.xhtml | 13 +- documentation/_n_e_h_o_g_descriptor_8h.xhtml | 11 +- .../_n_e_h_o_g_descriptor_8h_source.xhtml | 17 +- .../_n_e_h_o_g_descriptor_kernel_8h.xhtml | 9 +- .../_n_e_h_o_g_descriptor_kernel_8h_source.xhtml | 11 +- documentation/_n_e_h_o_g_detector_8h.xhtml | 9 +- documentation/_n_e_h_o_g_detector_8h_source.xhtml | 14 +- documentation/_n_e_h_o_g_detector_kernel_8h.xhtml | 9 +- .../_n_e_h_o_g_detector_kernel_8h_source.xhtml | 14 +- documentation/_n_e_h_o_g_gradient_8h.xhtml | 13 +- documentation/_n_e_h_o_g_gradient_8h_source.xhtml | 21 +- documentation/_n_e_h_o_g_multi_detection_8h.xhtml | 15 +- .../_n_e_h_o_g_multi_detection_8h_source.xhtml | 20 +- ...g_non_maxima_suppression_kernel_8h_source.xhtml | 146 - documentation/_n_e_harris_corners_8h.xhtml | 13 +- documentation/_n_e_harris_corners_8h_source.xhtml | 20 +- documentation/_n_e_harris_corners_kernel_8h.xhtml | 9 +- .../_n_e_harris_corners_kernel_8h_source.xhtml | 13 +- documentation/_n_e_histogram_8h.xhtml | 9 +- documentation/_n_e_histogram_8h_source.xhtml | 11 +- documentation/_n_e_histogram_kernel_8h.xhtml | 9 +- .../_n_e_histogram_kernel_8h_source.xhtml | 11 +- documentation/_n_e_im2_col_kernel_8h.xhtml | 9 +- documentation/_n_e_im2_col_kernel_8h_source.xhtml | 15 +- documentation/_n_e_integral_image_8h.xhtml | 9 +- documentation/_n_e_integral_image_8h_source.xhtml | 11 +- documentation/_n_e_integral_image_kernel_8h.xhtml | 9 +- .../_n_e_integral_image_kernel_8h_source.xhtml | 13 +- documentation/_n_e_kernels_8h.xhtml | 17 +- documentation/_n_e_kernels_8h_source.xhtml | 19 +- documentation/_n_e_l_k_tracker_kernel_8h.xhtml | 11 +- .../_n_e_l_k_tracker_kernel_8h_source.xhtml | 21 +- documentation/_n_e_laplacian_pyramid_8h.xhtml | 11 +- .../_n_e_laplacian_pyramid_8h_source.xhtml | 15 +- documentation/_n_e_laplacian_reconstruct_8h.xhtml | 11 +- .../_n_e_laplacian_reconstruct_8h_source.xhtml | 15 +- .../_n_e_locally_connected_layer_8h.xhtml | 156 + .../_n_e_locally_connected_layer_8h_source.xhtml | 152 + ...cally_connected_matrix_multiply_kernel_8h.xhtml | 150 + ...nnected_matrix_multiply_kernel_8h_source.xhtml} | 30 +- documentation/_n_e_magnitude_8h.xhtml | 9 +- documentation/_n_e_magnitude_8h_source.xhtml | 11 +- documentation/_n_e_magnitude_phase_kernel_8h.xhtml | 11 +- .../_n_e_magnitude_phase_kernel_8h_source.xhtml | 17 +- documentation/_n_e_math_8h.js | 16 +- documentation/_n_e_math_8h.xhtml | 62 +- documentation/_n_e_math_8h_source.xhtml | 30 +- documentation/_n_e_math_8inl.js | 12 + documentation/_n_e_math_8inl.xhtml | 175 + documentation/_n_e_math_8inl_source.xhtml | 145 + documentation/_n_e_mean_std_dev_8h.xhtml | 11 +- documentation/_n_e_mean_std_dev_8h_source.xhtml | 15 +- documentation/_n_e_mean_std_dev_kernel_8h.xhtml | 9 +- .../_n_e_mean_std_dev_kernel_8h_source.xhtml | 11 +- documentation/_n_e_median3x3_8h.xhtml | 11 +- documentation/_n_e_median3x3_8h_source.xhtml | 15 +- documentation/_n_e_median3x3_kernel_8h.xhtml | 9 +- .../_n_e_median3x3_kernel_8h_source.xhtml | 13 +- documentation/_n_e_min_max_location_8h.xhtml | 9 +- .../_n_e_min_max_location_8h_source.xhtml | 15 +- .../_n_e_min_max_location_kernel_8h.xhtml | 9 +- .../_n_e_min_max_location_kernel_8h_source.xhtml | 15 +- documentation/_n_e_non_linear_filter_8h.xhtml | 11 +- .../_n_e_non_linear_filter_8h_source.xhtml | 19 +- .../_n_e_non_linear_filter_kernel_8h.xhtml | 11 +- .../_n_e_non_linear_filter_kernel_8h_source.xhtml | 21 +- .../_n_e_non_maxima_suppression3x3_8h.xhtml | 11 +- .../_n_e_non_maxima_suppression3x3_8h_source.xhtml | 15 +- .../_n_e_non_maxima_suppression3x3_kernel_8h.xhtml | 9 +- ...on_maxima_suppression3x3_kernel_8h_source.xhtml | 13 +- documentation/_n_e_normalization_layer_8h.xhtml | 13 +- .../_n_e_normalization_layer_8h_source.xhtml | 21 +- .../_n_e_normalization_layer_kernel_8h.xhtml | 9 +- ..._n_e_normalization_layer_kernel_8h_source.xhtml | 17 +- documentation/_n_e_o_n_2_alex_net_8cpp.js | 4 + documentation/_n_e_o_n_2_alex_net_8cpp.xhtml | 178 + .../_n_e_o_n_2_alex_net_8cpp_source.xhtml | 169 + documentation/_n_e_o_n_2_depth_convert_8cpp.xhtml | 151 + .../_n_e_o_n_2_depth_convert_8cpp_source.xhtml | 186 + documentation/_n_e_o_n_2_fill_border_8cpp.xhtml | 150 + .../_n_e_o_n_2_fill_border_8cpp_source.xhtml | 169 + documentation/_n_e_o_n_2_g_e_m_m_8h.xhtml | 156 + documentation/_n_e_o_n_2_g_e_m_m_8h_source.xhtml | 155 + documentation/_n_e_o_n_2_helper_8h.js | 5 + documentation/_n_e_o_n_2_helper_8h.xhtml | 159 + documentation/_n_e_o_n_2_helper_8h_source.xhtml | 146 + documentation/_n_e_o_n_2_le_net5_8cpp.js | 4 + documentation/_n_e_o_n_2_le_net5_8cpp.xhtml | 176 + documentation/_n_e_o_n_2_le_net5_8cpp_source.xhtml | 163 + documentation/_n_e_o_n_2_threshold_8cpp.xhtml | 152 + .../_n_e_o_n_2_threshold_8cpp_source.xhtml | 178 + documentation/_n_e_optical_flow_8h.xhtml | 13 +- documentation/_n_e_optical_flow_8h_source.xhtml | 19 +- documentation/_n_e_phase_8h.xhtml | 9 +- documentation/_n_e_phase_8h_source.xhtml | 11 +- .../_n_e_pixel_wise_multiplication_8h.xhtml | 11 +- .../_n_e_pixel_wise_multiplication_8h_source.xhtml | 19 +- .../_n_e_pixel_wise_multiplication_kernel_8h.xhtml | 11 +- ...ixel_wise_multiplication_kernel_8h_source.xhtml | 19 +- documentation/_n_e_pooling_layer_8h.xhtml | 11 +- documentation/_n_e_pooling_layer_8h_source.xhtml | 17 +- documentation/_n_e_pooling_layer_kernel_8h.xhtml | 9 +- .../_n_e_pooling_layer_kernel_8h_source.xhtml | 17 +- documentation/_n_e_remap_8h.xhtml | 13 +- documentation/_n_e_remap_8h_source.xhtml | 19 +- documentation/_n_e_remap_kernel_8h.xhtml | 11 +- documentation/_n_e_remap_kernel_8h_source.xhtml | 15 +- documentation/_n_e_scale_8h.xhtml | 13 +- documentation/_n_e_scale_8h_source.xhtml | 21 +- documentation/_n_e_scale_kernel_8h.xhtml | 11 +- documentation/_n_e_scale_kernel_8h_source.xhtml | 17 +- documentation/_n_e_scharr3x3_8h.xhtml | 11 +- documentation/_n_e_scharr3x3_8h_source.xhtml | 15 +- documentation/_n_e_scharr3x3_kernel_8h.xhtml | 9 +- .../_n_e_scharr3x3_kernel_8h_source.xhtml | 13 +- documentation/_n_e_scheduler_8h.js | 2 +- documentation/_n_e_scheduler_8h.xhtml | 15 +- documentation/_n_e_scheduler_8h_source.xhtml | 15 +- documentation/_n_e_sobel3x3_8h.xhtml | 11 +- documentation/_n_e_sobel3x3_8h_source.xhtml | 15 +- documentation/_n_e_sobel3x3_kernel_8h.xhtml | 9 +- documentation/_n_e_sobel3x3_kernel_8h_source.xhtml | 13 +- documentation/_n_e_sobel5x5_8h.xhtml | 13 +- documentation/_n_e_sobel5x5_8h_source.xhtml | 19 +- documentation/_n_e_sobel5x5_kernel_8h.xhtml | 9 +- documentation/_n_e_sobel5x5_kernel_8h_source.xhtml | 13 +- documentation/_n_e_sobel7x7_8h.xhtml | 13 +- documentation/_n_e_sobel7x7_8h_source.xhtml | 19 +- documentation/_n_e_sobel7x7_kernel_8h.xhtml | 9 +- documentation/_n_e_sobel7x7_kernel_8h_source.xhtml | 13 +- documentation/_n_e_softmax_layer_8h.xhtml | 11 +- documentation/_n_e_softmax_layer_8h_source.xhtml | 21 +- documentation/_n_e_softmax_layer_kernel_8h.xhtml | 9 +- .../_n_e_softmax_layer_kernel_8h_source.xhtml | 20 +- documentation/_n_e_table_lookup_8h.xhtml | 9 +- documentation/_n_e_table_lookup_8h_source.xhtml | 11 +- documentation/_n_e_table_lookup_kernel_8h.xhtml | 9 +- .../_n_e_table_lookup_kernel_8h_source.xhtml | 11 +- documentation/_n_e_threshold_8h.xhtml | 11 +- documentation/_n_e_threshold_8h_source.xhtml | 18 +- documentation/_n_e_threshold_kernel_8h.xhtml | 11 +- .../_n_e_threshold_kernel_8h_source.xhtml | 18 +- documentation/_n_e_transpose_8h.xhtml | 11 +- documentation/_n_e_transpose_8h_source.xhtml | 15 +- documentation/_n_e_transpose_kernel_8h.xhtml | 9 +- .../_n_e_transpose_kernel_8h_source.xhtml | 11 +- documentation/_n_e_warp_affine_8h.xhtml | 11 +- documentation/_n_e_warp_affine_8h_source.xhtml | 17 +- documentation/_n_e_warp_kernel_8h.xhtml | 11 +- documentation/_n_e_warp_kernel_8h_source.xhtml | 15 +- documentation/_n_e_warp_perspective_8h.xhtml | 11 +- .../_n_e_warp_perspective_8h_source.xhtml | 17 +- ....xhtml => _n_e_weights_reshape_kernel_8h.xhtml} | 21 +- .../_n_e_weights_reshape_kernel_8h_source.xhtml | 144 + documentation/_normalization_layer_8h.xhtml | 157 + documentation/_normalization_layer_8h_source.xhtml | 150 + documentation/_normalization_layer_dataset_8h.js | 7 + .../_normalization_layer_dataset_8h.xhtml | 167 + .../_normalization_layer_dataset_8h_source.xhtml | 152 + documentation/_normalization_type_dataset_8h.xhtml | 152 + .../_normalization_type_dataset_8h_source.xhtml | 143 + documentation/_o_m_p_scheduler_8h.xhtml | 150 + documentation/_o_m_p_scheduler_8h_source.xhtml | 140 + documentation/_open_c_l_8h.xhtml | 9 +- documentation/_open_c_l_8h_source.xhtml | 11 +- documentation/_p_m_u_counter_8cpp.js | 4 + documentation/_p_m_u_counter_8cpp.xhtml | 180 + documentation/_p_m_u_counter_8cpp_source.xhtml | 145 + documentation/_p_m_u_counter_8h.xhtml | 157 + documentation/_p_m_u_counter_8h_source.xhtml | 142 + .../_performance_program_options_8cpp.xhtml | 148 + .../_performance_program_options_8cpp_source.xhtml | 135 + .../_performance_program_options_8h.xhtml | 154 + .../_performance_program_options_8h_source.xhtml | 137 + .../_performance_user_configuration_8cpp.xhtml | 148 + ...erformance_user_configuration_8cpp_source.xhtml | 140 + .../_performance_user_configuration_8h.xhtml | 154 + ..._performance_user_configuration_8h_source.xhtml | 140 + documentation/_pixel_value_8h.xhtml | 9 +- documentation/_pixel_value_8h_source.xhtml | 22 +- .../_pixel_wise_multiplication_8cpp.xhtml | 151 + .../_pixel_wise_multiplication_8cpp_source.xhtml | 184 + documentation/_pooling_layer_8h.xhtml | 156 + documentation/_pooling_layer_8h_source.xhtml | 151 + documentation/_pooling_layer_dataset_8h.js | 9 + documentation/_pooling_layer_dataset_8h.xhtml | 170 + .../_pooling_layer_dataset_8h_source.xhtml | 162 + documentation/_profiler_8cpp.xhtml | 149 + documentation/_profiler_8cpp_source.xhtml | 142 + documentation/_profiler_8h.xhtml | 158 + documentation/_profiler_8h_source.xhtml | 142 + documentation/_program_options_8cpp.xhtml | 150 + documentation/_program_options_8cpp_source.xhtml | 141 + documentation/_program_options_8h.xhtml | 154 + documentation/_program_options_8h_source.xhtml | 139 + documentation/_pyramid_8h.xhtml | 13 +- documentation/_pyramid_8h_source.xhtml | 19 +- documentation/_pyramid_info_8h.xhtml | 11 +- documentation/_pyramid_info_8h_source.xhtml | 17 +- documentation/_raw_tensor_8cpp.js | 4 + documentation/_raw_tensor_8cpp.xhtml | 158 + documentation/_raw_tensor_8cpp_source.xhtml | 172 + documentation/_raw_tensor_8h.xhtml | 156 + documentation/_raw_tensor_8h_source.xhtml | 159 + documentation/_reciprocal___q_s8_8cpp.xhtml | 151 + documentation/_reciprocal___q_s8_8cpp_source.xhtml | 176 + documentation/_reference_8cpp.xhtml | 152 + documentation/_reference_8cpp_source.xhtml | 193 + documentation/_reference_8h.xhtml | 155 + documentation/_reference_8h_source.xhtml | 172 + documentation/_reference_c_p_p_8cpp.xhtml | 160 + documentation/_reference_c_p_p_8cpp_source.xhtml | 216 + documentation/_reference_c_p_p_8h.xhtml | 156 + documentation/_reference_c_p_p_8h_source.xhtml | 171 + documentation/_rounding_policy_dataset_8h.xhtml | 153 + .../_rounding_policy_dataset_8h_source.xhtml | 143 + documentation/_scheduler_8h.xhtml | 151 + documentation/_scheduler_8h_source.xhtml | 143 + documentation/_shape_datasets_8h.xhtml | 162 + documentation/_shape_datasets_8h_source.xhtml | 147 + documentation/_single_thread_scheduler_8h.xhtml | 150 + .../_single_thread_scheduler_8h_source.xhtml | 140 + documentation/_size2_d_8h.xhtml | 9 +- documentation/_size2_d_8h_source.xhtml | 11 +- documentation/_softmax_layer_8cpp.xhtml | 151 + documentation/_softmax_layer_8cpp_source.xhtml | 180 + documentation/_steps_8h.xhtml | 11 +- documentation/_steps_8h_source.xhtml | 17 +- documentation/_strides_8h.xhtml | 11 +- documentation/_strides_8h_source.xhtml | 15 +- documentation/_sub_tensor_8h.xhtml | 152 + documentation/_sub_tensor_8h_source.xhtml | 147 + documentation/_sub_tensor_info_8h.xhtml | 156 + documentation/_sub_tensor_info_8h_source.xhtml | 197 + documentation/_tensor_8h.js | 5 - documentation/_tensor_8h_source.xhtml | 144 - documentation/_tensor_allocator_8h.xhtml | 9 +- documentation/_tensor_allocator_8h_source.xhtml | 13 +- documentation/_tensor_cache_8h.xhtml | 155 + documentation/_tensor_cache_8h_source.xhtml | 138 + documentation/_tensor_factory_8h.js | 6 + documentation/_tensor_factory_8h.xhtml | 165 + documentation/_tensor_factory_8h_source.xhtml | 154 + documentation/_tensor_info_8cpp.xhtml | 139 + documentation/_tensor_info_8cpp_source.xhtml | 150 + documentation/_tensor_info_8h.xhtml | 14 +- documentation/_tensor_info_8h_source.xhtml | 81 +- documentation/_tensor_library_8cpp.xhtml | 159 + documentation/_tensor_library_8cpp_source.xhtml | 164 + documentation/_tensor_library_8h.xhtml | 167 + documentation/_tensor_library_8h_source.xhtml | 186 + documentation/_tensor_operations_8h.js | 38 + documentation/_tensor_operations_8h.xhtml | 265 + documentation/_tensor_operations_8h_source.xhtml | 262 + documentation/_tensor_shape_8cpp.xhtml | 138 + documentation/_tensor_shape_8cpp_source.xhtml | 145 + documentation/_tensor_shape_8h.xhtml | 9 +- documentation/_tensor_shape_8h_source.xhtml | 24 +- documentation/_tensor_visitors_8h.js | 20 + documentation/_tensor_visitors_8h.xhtml | 195 + documentation/_tensor_visitors_8h_source.xhtml | 213 + documentation/_threshold_dataset_8h.xhtml | 160 + documentation/_threshold_dataset_8h_source.xhtml | 152 + documentation/_type_printer_8h.js | 16 + documentation/_type_printer_8h.xhtml | 190 + documentation/_type_printer_8h_source.xhtml | 215 + documentation/_type_reader_8h.js | 4 + documentation/_type_reader_8h.xhtml | 153 + documentation/_type_reader_8h_source.xhtml | 139 + documentation/_types_8h.js | 136 - documentation/_types_8h_source.xhtml | 297 - documentation/_user_configuration_8cpp.xhtml | 147 + .../_user_configuration_8cpp_source.xhtml | 141 + documentation/_user_configuration_8h.xhtml | 155 + documentation/_user_configuration_8h_source.xhtml | 142 + documentation/_utils_8cpp_source.xhtml | 153 - documentation/_v_x_2_depth_convert_8cpp.xhtml | 149 + .../_v_x_2_depth_convert_8cpp_source.xhtml | 164 + documentation/_v_x_helpers_8h.js | 4 + documentation/_v_x_helpers_8h.xhtml | 156 + documentation/_v_x_helpers_8h_source.xhtml | 141 + documentation/_validate_8h.js | 18 +- documentation/_validate_8h.xhtml | 255 +- documentation/_validate_8h_source.xhtml | 61 +- documentation/_validation_8cpp.js | 10 + documentation/_validation_8cpp.xhtml | 186 + documentation/_validation_8cpp_source.xhtml | 195 + documentation/_validation_8h.js | 11 + documentation/_validation_8h.xhtml | 177 + documentation/_validation_8h_source.xhtml | 144 + .../_validation_program_options_8cpp.xhtml | 149 + .../_validation_program_options_8cpp_source.xhtml | 135 + documentation/_validation_program_options_8h.xhtml | 154 + .../_validation_program_options_8h_source.xhtml | 137 + documentation/_validation_user_configuration_8h.js | 4 + .../_validation_user_configuration_8h.xhtml | 153 + .../_validation_user_configuration_8h_source.xhtml | 136 + documentation/_wall_clock_timer_8cpp.xhtml | 148 + documentation/_wall_clock_timer_8cpp_source.xhtml | 138 + documentation/_wall_clock_timer_8h.xhtml | 155 + documentation/_wall_clock_timer_8h_source.xhtml | 140 + documentation/_window_8h.xhtml | 11 +- documentation/_window_8h_source.xhtml | 51 +- documentation/_window_8inl.xhtml | 9 +- documentation/_window_8inl_source.xhtml | 35 +- documentation/absdiff_8cl.xhtml | 9 +- documentation/absdiff_8cl_source.xhtml | 9 +- documentation/accumulate_8cl.xhtml | 15 +- documentation/accumulate_8cl_source.xhtml | 9 +- documentation/activation__layer_8cl.js | 2 +- documentation/activation__layer_8cl.xhtml | 65 +- documentation/activation__layer_8cl_source.xhtml | 25 +- documentation/annotated.xhtml | 1031 +- documentation/annotated_dup.js | 3 +- documentation/architecture.xhtml | 262 + documentation/arithmetic__op_8cl.xhtml | 9 +- documentation/arithmetic__op_8cl_source.xhtml | 9 +- .../arm__compute_2core_2_fixed_point_8h.js | 26 + .../arm__compute_2core_2_fixed_point_8h.xhtml | 221 + ...rm__compute_2core_2_fixed_point_8h_source.xhtml | 157 + documentation/arm__compute_2core_2_helpers_8h.js | 39 + .../arm__compute_2core_2_helpers_8h.xhtml | 277 + .../arm__compute_2core_2_helpers_8h_source.xhtml | 195 + documentation/arm__compute_2core_2_types_8h.js | 140 + ...h.xhtml => arm__compute_2core_2_types_8h.xhtml} | 47 +- .../arm__compute_2core_2_types_8h_source.xhtml | 309 + documentation/arm__compute_2core_2_utils_8h.js | 4 +- documentation/arm__compute_2core_2_utils_8h.xhtml | 25 +- .../arm__compute_2core_2_utils_8h_source.xhtml | 78 +- documentation/arm__compute_2runtime_2_tensor_8h.js | 5 + ...tml => arm__compute_2runtime_2_tensor_8h.xhtml} | 15 +- .../arm__compute_2runtime_2_tensor_8h_source.xhtml | 145 + documentation/arm__compute_2runtime_2_utils_8h.js | 4 + .../arm__compute_2runtime_2_utils_8h.xhtml | 151 + .../arm__compute_2runtime_2_utils_8h_source.xhtml | 136 + documentation/batchnormalization__layer_8cl.js | 4 + documentation/batchnormalization__layer_8cl.xhtml | 408 + .../batchnormalization__layer_8cl_source.xhtml | 143 + .../benchmark_2_c_l_2_activation_layer_8cpp.js | 5 + .../benchmark_2_c_l_2_activation_layer_8cpp.xhtml | 196 + ...mark_2_c_l_2_activation_layer_8cpp_source.xhtml | 154 + .../benchmark_2_c_l_2_bitwise_and_8cpp.js | 4 + .../benchmark_2_c_l_2_bitwise_and_8cpp.xhtml | 174 + ...benchmark_2_c_l_2_bitwise_and_8cpp_source.xhtml | 161 + .../benchmark_2_c_l_2_convolution_layer_8cpp.js | 5 + .../benchmark_2_c_l_2_convolution_layer_8cpp.xhtml | 196 + ...ark_2_c_l_2_convolution_layer_8cpp_source.xhtml | 154 + ...benchmark_2_c_l_2_fully_connected_layer_8cpp.js | 5 + ...chmark_2_c_l_2_fully_connected_layer_8cpp.xhtml | 198 + ...2_c_l_2_fully_connected_layer_8cpp_source.xhtml | 154 + documentation/benchmark_2_c_l_2_g_e_m_m_8cpp.js | 5 + documentation/benchmark_2_c_l_2_g_e_m_m_8cpp.xhtml | 196 + .../benchmark_2_c_l_2_g_e_m_m_8cpp_source.xhtml | 154 + .../benchmark_2_c_l_2_normalization_layer_8cpp.js | 5 + ...enchmark_2_c_l_2_normalization_layer_8cpp.xhtml | 196 + ...k_2_c_l_2_normalization_layer_8cpp_source.xhtml | 154 + .../benchmark_2_c_l_2_pooling_layer_8cpp.js | 5 + .../benchmark_2_c_l_2_pooling_layer_8cpp.xhtml | 196 + ...nchmark_2_c_l_2_pooling_layer_8cpp_source.xhtml | 154 + documentation/benchmark_2_datasets_8h.js | 6 + documentation/benchmark_2_datasets_8h.xhtml | 172 + documentation/benchmark_2_datasets_8h_source.xhtml | 147 + .../benchmark_2_n_e_o_n_2_activation_layer_8cpp.js | 5 + ...nchmark_2_n_e_o_n_2_activation_layer_8cpp.xhtml | 195 + ..._2_n_e_o_n_2_activation_layer_8cpp_source.xhtml | 151 + .../benchmark_2_n_e_o_n_2_bitwise_and_8cpp.js | 4 + .../benchmark_2_n_e_o_n_2_bitwise_and_8cpp.xhtml | 173 + ...hmark_2_n_e_o_n_2_bitwise_and_8cpp_source.xhtml | 158 + ...benchmark_2_n_e_o_n_2_convolution_layer_8cpp.js | 5 + ...chmark_2_n_e_o_n_2_convolution_layer_8cpp.xhtml | 195 + ...2_n_e_o_n_2_convolution_layer_8cpp_source.xhtml | 151 + ...rk_2_n_e_o_n_2_convolution_layer_direct_8cpp.js | 5 + ...2_n_e_o_n_2_convolution_layer_direct_8cpp.xhtml | 196 + ..._n_2_convolution_layer_direct_8cpp_source.xhtml | 152 + ...hmark_2_n_e_o_n_2_fully_connected_layer_8cpp.js | 5 + ...rk_2_n_e_o_n_2_fully_connected_layer_8cpp.xhtml | 195 + ...e_o_n_2_fully_connected_layer_8cpp_source.xhtml | 151 + .../benchmark_2_n_e_o_n_2_g_e_m_m_8cpp.js | 5 + .../benchmark_2_n_e_o_n_2_g_e_m_m_8cpp.xhtml | 195 + ...benchmark_2_n_e_o_n_2_g_e_m_m_8cpp_source.xhtml | 151 + ...nchmark_2_n_e_o_n_2_normalization_layer_8cpp.js | 5 + ...mark_2_n_e_o_n_2_normalization_layer_8cpp.xhtml | 195 + ...n_e_o_n_2_normalization_layer_8cpp_source.xhtml | 151 + .../benchmark_2_n_e_o_n_2_pooling_layer_8cpp.js | 5 + .../benchmark_2_n_e_o_n_2_pooling_layer_8cpp.xhtml | 195 + ...ark_2_n_e_o_n_2_pooling_layer_8cpp_source.xhtml | 151 + documentation/benchmark_2main_8cpp.js | 6 + documentation/benchmark_2main_8cpp.xhtml | 214 + documentation/benchmark_2main_8cpp_source.xhtml | 158 + ...mark_2system__tests_2common_2_alex_net_8h.xhtml | 157 + ...ystem__tests_2common_2_alex_net_8h_source.xhtml | 147 + ...hmark_2system__tests_2common_2_le_net5_8h.xhtml | 157 + ...system__tests_2common_2_le_net5_8h_source.xhtml | 145 + documentation/bitwise__op_8cl.xhtml | 17 +- documentation/bitwise__op_8cl_source.xhtml | 9 +- documentation/boost__wrapper_8h.xhtml | 139 + documentation/boost__wrapper_8h_source.xhtml | 132 + documentation/canny_8cl.xhtml | 20 +- documentation/canny_8cl_source.xhtml | 14 +- documentation/channel__combine_8cl.xhtml | 9 +- documentation/channel__combine_8cl_source.xhtml | 9 +- documentation/channel__extract_8cl.xhtml | 9 +- documentation/channel__extract_8cl_source.xhtml | 9 +- documentation/cl__convolution_8cpp.xhtml | 37 +- documentation/cl__convolution_8cpp_source.xhtml | 38 +- documentation/cl__events_8cpp.xhtml | 39 +- documentation/cl__events_8cpp_source.xhtml | 42 +- ...sarm__compute_1_1_access_window_auto_padding.js | 2 +- ...m__compute_1_1_access_window_auto_padding.xhtml | 19 +- ...assarm__compute_1_1_access_window_horizontal.js | 2 +- ...arm__compute_1_1_access_window_horizontal.xhtml | 37 +- ...lassarm__compute_1_1_access_window_rectangle.js | 4 +- ...sarm__compute_1_1_access_window_rectangle.xhtml | 35 +- .../classarm__compute_1_1_access_window_static.js | 4 +- ...lassarm__compute_1_1_access_window_static.xhtml | 29 +- ...mpute_1_1_access_window_static__coll__graph.map | 2 +- ...mpute_1_1_access_window_static__coll__graph.md5 | 2 +- ...mpute_1_1_access_window_static__coll__graph.svg | 18 +- ...sarm__compute_1_1_access_window_transpose.xhtml | 21 +- ...classarm__compute_1_1_access_window_vertical.js | 2 +- ...ssarm__compute_1_1_access_window_vertical.xhtml | 37 +- ...assarm__compute_1_1_activation_layer_info.xhtml | 43 +- documentation/classarm__compute_1_1_array.xhtml | 12 +- ...sarm__compute_1_1_c_l_absolute_difference.xhtml | 9 +- ...ompute_1_1_c_l_absolute_difference_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_accumulate.xhtml | 9 +- ...assarm__compute_1_1_c_l_accumulate_kernel.xhtml | 18 +- ...ssarm__compute_1_1_c_l_accumulate_squared.xhtml | 9 +- ...compute_1_1_c_l_accumulate_squared_kernel.xhtml | 18 +- ...sarm__compute_1_1_c_l_accumulate_weighted.xhtml | 9 +- ...ompute_1_1_c_l_accumulate_weighted_kernel.xhtml | 18 +- ...lassarm__compute_1_1_c_l_activation_layer.xhtml | 9 +- ...__compute_1_1_c_l_activation_layer_kernel.xhtml | 28 +- ..._1_c_l_activation_layer_kernel__coll__graph.map | 9 +- ..._1_c_l_activation_layer_kernel__coll__graph.md5 | 2 +- ..._1_c_l_activation_layer_kernel__coll__graph.svg | 43 +- ...sarm__compute_1_1_c_l_arithmetic_addition.xhtml | 9 +- ...ompute_1_1_c_l_arithmetic_addition_kernel.xhtml | 18 +- ...m__compute_1_1_c_l_arithmetic_subtraction.xhtml | 9 +- ...ute_1_1_c_l_arithmetic_subtraction_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_array.xhtml | 11 +- ...m__compute_1_1_c_l_batch_normalization_layer.js | 6 + ...compute_1_1_c_l_batch_normalization_layer.xhtml | 300 + ..._c_l_batch_normalization_layer__coll__graph.map | 3 + ..._c_l_batch_normalization_layer__coll__graph.md5 | 1 + ..._c_l_batch_normalization_layer__coll__graph.svg | 31 + ...ute_1_1_c_l_batch_normalization_layer_kernel.js | 11 + ..._1_1_c_l_batch_normalization_layer_kernel.xhtml | 500 + ...tch_normalization_layer_kernel__coll__graph.map | 4 + ...tch_normalization_layer_kernel__coll__graph.md5 | 1 + ...tch_normalization_layer_kernel__coll__graph.svg | 45 + .../classarm__compute_1_1_c_l_bitwise_and.xhtml | 9 +- ...ssarm__compute_1_1_c_l_bitwise_and_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_bitwise_not.xhtml | 9 +- ...ssarm__compute_1_1_c_l_bitwise_not_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_bitwise_or.xhtml | 9 +- ...assarm__compute_1_1_c_l_bitwise_or_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_bitwise_xor.xhtml | 9 +- ...ssarm__compute_1_1_c_l_bitwise_xor_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_box3x3.xhtml | 9 +- .../classarm__compute_1_1_c_l_box3x3_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_canny_edge.xhtml | 11 +- ...classarm__compute_1_1_c_l_channel_combine.xhtml | 9 +- ...m__compute_1_1_c_l_channel_combine_kernel.xhtml | 18 +- ...classarm__compute_1_1_c_l_channel_extract.xhtml | 9 +- ...m__compute_1_1_c_l_channel_extract_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_col2_im_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_color_convert.xhtml | 9 +- ...arm__compute_1_1_c_l_color_convert_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_convolution3x3.xhtml | 9 +- ...ssarm__compute_1_1_c_l_convolution_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_convolution_layer.js | 2 +- ...assarm__compute_1_1_c_l_convolution_layer.xhtml | 30 +- ...te_1_1_c_l_convolution_layer_reshape_weights.js | 6 + ...1_1_c_l_convolution_layer_reshape_weights.xhtml | 280 + ...volution_layer_reshape_weights__coll__graph.map | 3 + ...volution_layer_reshape_weights__coll__graph.md5 | 1 + ...volution_layer_reshape_weights__coll__graph.svg | 32 + ...c_l_convolution_layer_weights_reshape_kernel.js | 6 - ..._convolution_layer_weights_reshape_kernel.xhtml | 239 +- ...n_layer_weights_reshape_kernel__coll__graph.map | 5 +- ...n_layer_weights_reshape_kernel__coll__graph.md5 | 2 +- ...n_layer_weights_reshape_kernel__coll__graph.svg | 33 +- ...rm__compute_1_1_c_l_convolution_rectangle.xhtml | 9 +- ...pute_1_1_c_l_convolution_rectangle_kernel.xhtml | 18 +- ...ssarm__compute_1_1_c_l_convolution_square.xhtml | 11 +- ...arm__compute_1_1_c_l_copy_to_array_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_depth_concatenate.js | 6 + ...assarm__compute_1_1_c_l_depth_concatenate.xhtml | 267 + ...pute_1_1_c_l_depth_concatenate__coll__graph.map | 3 + ...pute_1_1_c_l_depth_concatenate__coll__graph.md5 | 1 + ...pute_1_1_c_l_depth_concatenate__coll__graph.svg | 31 + ...rm__compute_1_1_c_l_depth_concatenate_kernel.js | 12 + ..._compute_1_1_c_l_depth_concatenate_kernel.xhtml | 504 + ...1_c_l_depth_concatenate_kernel__coll__graph.map | 4 + ...1_c_l_depth_concatenate_kernel__coll__graph.md5 | 1 + ...1_c_l_depth_concatenate_kernel__coll__graph.svg | 44 + .../classarm__compute_1_1_c_l_depth_convert.xhtml | 9 +- ...arm__compute_1_1_c_l_depth_convert_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_derivative.xhtml | 9 +- ...assarm__compute_1_1_c_l_derivative_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_dilate.xhtml | 9 +- .../classarm__compute_1_1_c_l_dilate_kernel.xhtml | 18 +- ...classarm__compute_1_1_c_l_distribution1_d.xhtml | 9 +- ...e_1_1_c_l_edge_non_max_suppression_kernel.xhtml | 18 +- ...assarm__compute_1_1_c_l_edge_trace_kernel.xhtml | 18 +- ...ssarm__compute_1_1_c_l_equalize_histogram.xhtml | 11 +- .../classarm__compute_1_1_c_l_erode.xhtml | 9 +- .../classarm__compute_1_1_c_l_erode_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_fast_corners.xhtml | 11 +- ...sarm__compute_1_1_c_l_fast_corners_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_fill_border.xhtml | 9 +- ...ssarm__compute_1_1_c_l_fill_border_kernel.xhtml | 18 +- ...ssarm__compute_1_1_c_l_fully_connected_layer.js | 2 +- ...rm__compute_1_1_c_l_fully_connected_layer.xhtml | 39 +- ..._1_c_l_fully_connected_layer_reshape_weights.js | 6 + ...c_l_fully_connected_layer_reshape_weights.xhtml | 282 + ...onnected_layer_reshape_weights__coll__graph.map | 3 + ...onnected_layer_reshape_weights__coll__graph.md5 | 1 + ...onnected_layer_reshape_weights__coll__graph.svg | 32 + .../classarm__compute_1_1_c_l_g_e_m_m.xhtml | 11 +- ...rm__compute_1_1_c_l_g_e_m_m_interleave4x4.xhtml | 9 +- ...pute_1_1_c_l_g_e_m_m_interleave4x4_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_g_e_m_m_lowp.xhtml | 11 +- ...1_c_l_g_e_m_m_lowp_matrix_multiply_kernel.xhtml | 18 +- ...l_g_e_m_m_matrix_accumulate_biases_kernel.xhtml | 18 +- ...te_1_1_c_l_g_e_m_m_matrix_addition_kernel.xhtml | 18 +- ...te_1_1_c_l_g_e_m_m_matrix_multiply_kernel.xhtml | 18 +- ...compute_1_1_c_l_g_e_m_m_transpose1x_w_kernel.js | 3 +- ...pute_1_1_c_l_g_e_m_m_transpose1x_w_kernel.xhtml | 71 +- .../classarm__compute_1_1_c_l_gaussian3x3.xhtml | 9 +- ...ssarm__compute_1_1_c_l_gaussian3x3_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_gaussian5x5.xhtml | 11 +- ...m__compute_1_1_c_l_gaussian5x5_hor_kernel.xhtml | 18 +- ...__compute_1_1_c_l_gaussian5x5_vert_kernel.xhtml | 18 +- ...lassarm__compute_1_1_c_l_gaussian_pyramid.xhtml | 9 +- ...rm__compute_1_1_c_l_gaussian_pyramid_half.xhtml | 11 +- ...mpute_1_1_c_l_gaussian_pyramid_hor_kernel.xhtml | 18 +- ...arm__compute_1_1_c_l_gaussian_pyramid_orb.xhtml | 11 +- ...pute_1_1_c_l_gaussian_pyramid_vert_kernel.xhtml | 18 +- ...classarm__compute_1_1_c_l_gradient_kernel.xhtml | 18 +- documentation/classarm__compute_1_1_c_l_h_o_g.js | 10 + .../classarm__compute_1_1_c_l_h_o_g.xhtml | 380 + ...lassarm__compute_1_1_c_l_h_o_g__coll__graph.map | 4 + ...lassarm__compute_1_1_c_l_h_o_g__coll__graph.md5 | 1 + ...lassarm__compute_1_1_c_l_h_o_g__coll__graph.svg | 44 + ...ute_1_1_c_l_h_o_g_block_normalization_kernel.js | 11 + ..._1_1_c_l_h_o_g_block_normalization_kernel.xhtml | 472 + ...o_g_block_normalization_kernel__coll__graph.map | 4 + ...o_g_block_normalization_kernel__coll__graph.md5 | 1 + ...o_g_block_normalization_kernel__coll__graph.svg | 44 + .../classarm__compute_1_1_c_l_h_o_g_descriptor.js | 6 + ...lassarm__compute_1_1_c_l_h_o_g_descriptor.xhtml | 289 + ...mpute_1_1_c_l_h_o_g_descriptor__coll__graph.map | 3 + ...mpute_1_1_c_l_h_o_g_descriptor__coll__graph.md5 | 1 + ...mpute_1_1_c_l_h_o_g_descriptor__coll__graph.svg | 31 + .../classarm__compute_1_1_c_l_h_o_g_detector.js | 11 + .../classarm__compute_1_1_c_l_h_o_g_detector.xhtml | 439 + ...compute_1_1_c_l_h_o_g_detector__coll__graph.map | 3 + ...compute_1_1_c_l_h_o_g_detector__coll__graph.md5 | 1 + ...compute_1_1_c_l_h_o_g_detector__coll__graph.svg | 31 + ...ssarm__compute_1_1_c_l_h_o_g_detector_kernel.js | 11 + ...rm__compute_1_1_c_l_h_o_g_detector_kernel.xhtml | 500 + ..._1_1_c_l_h_o_g_detector_kernel__coll__graph.map | 4 + ..._1_1_c_l_h_o_g_detector_kernel__coll__graph.md5 | 1 + ..._1_1_c_l_h_o_g_detector_kernel__coll__graph.svg | 44 + .../classarm__compute_1_1_c_l_h_o_g_gradient.js | 6 + .../classarm__compute_1_1_c_l_h_o_g_gradient.xhtml | 295 + ...compute_1_1_c_l_h_o_g_gradient__coll__graph.map | 3 + ...compute_1_1_c_l_h_o_g_gradient__coll__graph.md5 | 1 + ...compute_1_1_c_l_h_o_g_gradient__coll__graph.svg | 31 + ...ssarm__compute_1_1_c_l_h_o_g_multi_detection.js | 8 + ...rm__compute_1_1_c_l_h_o_g_multi_detection.xhtml | 383 + ..._1_1_c_l_h_o_g_multi_detection__coll__graph.map | 3 + ..._1_1_c_l_h_o_g_multi_detection__coll__graph.md5 | 1 + ..._1_1_c_l_h_o_g_multi_detection__coll__graph.svg | 31 + ...ute_1_1_c_l_h_o_g_orientation_binning_kernel.js | 11 + ..._1_1_c_l_h_o_g_orientation_binning_kernel.xhtml | 479 + ...o_g_orientation_binning_kernel__coll__graph.map | 4 + ...o_g_orientation_binning_kernel__coll__graph.md5 | 1 + ...o_g_orientation_binning_kernel__coll__graph.svg | 44 + .../classarm__compute_1_1_c_l_harris_corners.xhtml | 11 +- ...sarm__compute_1_1_c_l_harris_score_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_histogram.xhtml | 11 +- ...__compute_1_1_c_l_histogram_border_kernel.xhtml | 18 +- ...lassarm__compute_1_1_c_l_histogram_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_im2_col_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_integral_image.xhtml | 11 +- ...compute_1_1_c_l_integral_image_hor_kernel.xhtml | 18 +- ...ompute_1_1_c_l_integral_image_vert_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_kernel_library.xhtml | 15 +- ...mpute_1_1_c_l_l_k_tracker_finalize_kernel.xhtml | 18 +- ...__compute_1_1_c_l_l_k_tracker_init_kernel.xhtml | 18 +- ...compute_1_1_c_l_l_k_tracker_stage0_kernel.xhtml | 18 +- ...compute_1_1_c_l_l_k_tracker_stage1_kernel.xhtml | 18 +- ...assarm__compute_1_1_c_l_laplacian_pyramid.xhtml | 11 +- ...rm__compute_1_1_c_l_laplacian_reconstruct.xhtml | 11 +- ...arm__compute_1_1_c_l_locally_connected_layer.js | 6 + ...__compute_1_1_c_l_locally_connected_layer.xhtml | 290 + ..._1_c_l_locally_connected_layer__coll__graph.map | 3 + ..._1_c_l_locally_connected_layer__coll__graph.md5 | 1 + ..._1_c_l_locally_connected_layer__coll__graph.svg | 31 + ...cally_connected_layer_weights_reshape_kernel.js | 5 + ...ly_connected_layer_weights_reshape_kernel.xhtml | 305 + ...d_layer_weights_reshape_kernel__coll__graph.map | 5 + ...d_layer_weights_reshape_kernel__coll__graph.md5 | 1 + ...d_layer_weights_reshape_kernel__coll__graph.svg | 58 + ...c_l_locally_connected_matrix_multiply_kernel.js | 10 + ..._locally_connected_matrix_multiply_kernel.xhtml | 445 + ...nnected_matrix_multiply_kernel__coll__graph.map | 4 + ...nnected_matrix_multiply_kernel__coll__graph.md5 | 1 + ...nnected_matrix_multiply_kernel__coll__graph.svg | 45 + ...arm__compute_1_1_c_l_logits1_d_max_kernel.xhtml | 18 +- ...rm__compute_1_1_c_l_logits1_d_norm_kernel.xhtml | 18 +- ...te_1_1_c_l_logits1_d_shift_exp_sum_kernel.xhtml | 18 +- documentation/classarm__compute_1_1_c_l_lut.xhtml | 9 +- .../classarm__compute_1_1_c_l_lut_allocator.xhtml | 9 +- .../classarm__compute_1_1_c_l_magnitude.xhtml | 9 +- ...m__compute_1_1_c_l_magnitude_phase_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_mean_std_dev.xhtml | 11 +- ...sarm__compute_1_1_c_l_mean_std_dev_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_median3x3.xhtml | 9 +- ...lassarm__compute_1_1_c_l_median3x3_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_min_max_kernel.xhtml | 18 +- ...lassarm__compute_1_1_c_l_min_max_location.xhtml | 11 +- ...__compute_1_1_c_l_min_max_location_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_multi_h_o_g.js | 7 + .../classarm__compute_1_1_c_l_multi_h_o_g.xhtml | 305 + ...m__compute_1_1_c_l_multi_h_o_g__coll__graph.map | 4 + ...m__compute_1_1_c_l_multi_h_o_g__coll__graph.md5 | 1 + ...m__compute_1_1_c_l_multi_h_o_g__coll__graph.svg | 44 + .../classarm__compute_1_1_c_l_multi_image.xhtml | 9 +- ...assarm__compute_1_1_c_l_non_linear_filter.xhtml | 9 +- ..._compute_1_1_c_l_non_linear_filter_kernel.xhtml | 18 +- ...compute_1_1_c_l_non_maxima_suppression3x3.xhtml | 9 +- ..._1_1_c_l_non_maxima_suppression3x3_kernel.xhtml | 18 +- ...sarm__compute_1_1_c_l_normalization_layer.xhtml | 11 +- ...ompute_1_1_c_l_normalization_layer_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_optical_flow.xhtml | 11 +- .../classarm__compute_1_1_c_l_phase.xhtml | 9 +- ...compute_1_1_c_l_pixel_wise_multiplication.xhtml | 9 +- ..._1_1_c_l_pixel_wise_multiplication_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_pooling_layer.xhtml | 9 +- ...arm__compute_1_1_c_l_pooling_layer_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_pyramid.xhtml | 9 +- .../classarm__compute_1_1_c_l_remap.xhtml | 9 +- .../classarm__compute_1_1_c_l_remap_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_scale.xhtml | 9 +- .../classarm__compute_1_1_c_l_scale_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_scharr3x3.xhtml | 9 +- ...lassarm__compute_1_1_c_l_scharr3x3_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_scheduler.js | 6 +- .../classarm__compute_1_1_c_l_scheduler.xhtml | 154 +- ..._1_1_c_l_separable_convolution_hor_kernel.xhtml | 18 +- ...1_1_c_l_separable_convolution_vert_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_sobel3x3.xhtml | 9 +- ...classarm__compute_1_1_c_l_sobel3x3_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_sobel5x5.xhtml | 11 +- ...sarm__compute_1_1_c_l_sobel5x5_hor_kernel.xhtml | 18 +- ...arm__compute_1_1_c_l_sobel5x5_vert_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_sobel7x7.xhtml | 11 +- ...sarm__compute_1_1_c_l_sobel7x7_hor_kernel.xhtml | 18 +- ...arm__compute_1_1_c_l_sobel7x7_vert_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_softmax_layer.xhtml | 11 +- .../classarm__compute_1_1_c_l_sub_tensor.js | 15 + .../classarm__compute_1_1_c_l_sub_tensor.xhtml | 552 + ...rm__compute_1_1_c_l_sub_tensor__coll__graph.map | 4 + ...rm__compute_1_1_c_l_sub_tensor__coll__graph.md5 | 1 + ...rm__compute_1_1_c_l_sub_tensor__coll__graph.svg | 44 + .../classarm__compute_1_1_c_l_table_lookup.xhtml | 9 +- ...sarm__compute_1_1_c_l_table_lookup_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_tensor.xhtml | 21 +- ...lassarm__compute_1_1_c_l_tensor_allocator.xhtml | 9 +- .../classarm__compute_1_1_c_l_threshold.xhtml | 9 +- ...lassarm__compute_1_1_c_l_threshold_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_transpose.xhtml | 9 +- ...lassarm__compute_1_1_c_l_transpose_kernel.xhtml | 18 +- .../classarm__compute_1_1_c_l_warp_affine.xhtml | 9 +- ...ssarm__compute_1_1_c_l_warp_affine_kernel.xhtml | 18 +- ...lassarm__compute_1_1_c_l_warp_perspective.xhtml | 9 +- ...__compute_1_1_c_l_warp_perspective_kernel.xhtml | 18 +- ...sarm__compute_1_1_c_l_weights_reshape_kernel.js | 11 + ...m__compute_1_1_c_l_weights_reshape_kernel.xhtml | 477 + ...1_1_c_l_weights_reshape_kernel__coll__graph.map | 4 + ...1_1_c_l_weights_reshape_kernel__coll__graph.md5 | 1 + ...1_1_c_l_weights_reshape_kernel__coll__graph.svg | 44 + ...ompute_1_1_c_p_p_corner_candidates_kernel.xhtml | 9 +- ...tection_window_non_maxima_suppression_kernel.js | 11 + ...ion_window_non_maxima_suppression_kernel.xhtml} | 104 +- ..._non_maxima_suppression_kernel__coll__graph.map | 4 + ..._non_maxima_suppression_kernel__coll__graph.md5 | 1 + ..._non_maxima_suppression_kernel__coll__graph.svg | 45 + .../classarm__compute_1_1_c_p_p_scheduler.js | 6 +- .../classarm__compute_1_1_c_p_p_scheduler.xhtml | 123 +- ...m__compute_1_1_c_p_p_scheduler__coll__graph.map | 3 + ...m__compute_1_1_c_p_p_scheduler__coll__graph.md5 | 1 + ...m__compute_1_1_c_p_p_scheduler__coll__graph.svg | 31 + ..._1_1_c_p_p_sort_euclidean_distance_kernel.xhtml | 9 +- ...sort_euclidean_distance_kernel__coll__graph.map | 2 +- ...sort_euclidean_distance_kernel__coll__graph.md5 | 2 +- ...sort_euclidean_distance_kernel__coll__graph.svg | 4 +- documentation/classarm__compute_1_1_coordinates.js | 1 + .../classarm__compute_1_1_coordinates.xhtml | 63 +- documentation/classarm__compute_1_1_dimensions.js | 3 +- .../classarm__compute_1_1_dimensions.xhtml | 180 +- .../classarm__compute_1_1_distribution1_d.xhtml | 9 +- documentation/classarm__compute_1_1_h_o_g.xhtml | 9 +- .../classarm__compute_1_1_h_o_g_info.xhtml | 9 +- .../classarm__compute_1_1_i_access_window.xhtml | 13 +- documentation/classarm__compute_1_1_i_array.xhtml | 15 +- .../classarm__compute_1_1_i_c_l_array.xhtml | 9 +- ...assarm__compute_1_1_i_c_l_distribution1_d.xhtml | 9 +- documentation/classarm__compute_1_1_i_c_l_h_o_g.js | 14 + .../classarm__compute_1_1_i_c_l_h_o_g.xhtml | 492 + ...ssarm__compute_1_1_i_c_l_h_o_g__coll__graph.map | 3 + ...ssarm__compute_1_1_i_c_l_h_o_g__coll__graph.md5 | 1 + ...ssarm__compute_1_1_i_c_l_h_o_g__coll__graph.svg | 31 + .../classarm__compute_1_1_i_c_l_kernel.js | 5 +- .../classarm__compute_1_1_i_c_l_kernel.xhtml | 100 +- .../classarm__compute_1_1_i_c_l_lut.xhtml | 9 +- .../classarm__compute_1_1_i_c_l_multi_h_o_g.js | 7 + .../classarm__compute_1_1_i_c_l_multi_h_o_g.xhtml | 318 + ..._compute_1_1_i_c_l_multi_h_o_g__coll__graph.map | 3 + ..._compute_1_1_i_c_l_multi_h_o_g__coll__graph.md5 | 1 + ..._compute_1_1_i_c_l_multi_h_o_g__coll__graph.svg | 31 + .../classarm__compute_1_1_i_c_l_multi_image.xhtml | 9 +- ...ssarm__compute_1_1_i_c_l_simple2_d_kernel.xhtml | 20 +- ...ssarm__compute_1_1_i_c_l_simple3_d_kernel.xhtml | 18 +- ...assarm__compute_1_1_i_c_l_simple_function.xhtml | 11 +- ...classarm__compute_1_1_i_c_l_simple_kernel.xhtml | 18 +- .../classarm__compute_1_1_i_c_l_tensor.xhtml | 23 +- .../classarm__compute_1_1_i_c_p_p_kernel.xhtml | 11 +- ...assarm__compute_1_1_i_c_p_p_simple_kernel.xhtml | 9 +- .../classarm__compute_1_1_i_distribution.xhtml | 9 +- .../classarm__compute_1_1_i_distribution1_d.xhtml | 9 +- .../classarm__compute_1_1_i_function.xhtml | 13 +- documentation/classarm__compute_1_1_i_h_o_g.xhtml | 13 +- documentation/classarm__compute_1_1_i_kernel.xhtml | 15 +- documentation/classarm__compute_1_1_i_lut.xhtml | 9 +- .../classarm__compute_1_1_i_lut_allocator.xhtml | 9 +- .../classarm__compute_1_1_i_multi_h_o_g.xhtml | 15 +- .../classarm__compute_1_1_i_multi_image.xhtml | 9 +- ...rm__compute_1_1_i_n_e_harris_score_kernel.xhtml | 11 +- ...assarm__compute_1_1_i_n_e_simple_function.xhtml | 13 +- .../classarm__compute_1_1_i_n_e_warp_kernel.xhtml | 9 +- .../classarm__compute_1_1_i_pyramid.xhtml | 9 +- documentation/classarm__compute_1_1_i_scheduler.js | 7 + .../classarm__compute_1_1_i_scheduler.xhtml | 300 + documentation/classarm__compute_1_1_i_tensor.js | 4 +- documentation/classarm__compute_1_1_i_tensor.xhtml | 47 +- .../classarm__compute_1_1_i_tensor_allocator.xhtml | 11 +- .../classarm__compute_1_1_i_tensor_info.js | 29 + .../classarm__compute_1_1_i_tensor_info.xhtml | 1075 ++ documentation/classarm__compute_1_1_iterator.js | 2 +- documentation/classarm__compute_1_1_iterator.xhtml | 108 +- documentation/classarm__compute_1_1_kernel.xhtml | 9 +- documentation/classarm__compute_1_1_lut.xhtml | 9 +- .../classarm__compute_1_1_lut_allocator.xhtml | 9 +- .../classarm__compute_1_1_multi_h_o_g.xhtml | 11 +- .../classarm__compute_1_1_multi_image.xhtml | 9 +- .../classarm__compute_1_1_multi_image_info.xhtml | 9 +- ...sarm__compute_1_1_n_e_absolute_difference.xhtml | 9 +- ...ompute_1_1_n_e_absolute_difference_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_accumulate.xhtml | 9 +- ...assarm__compute_1_1_n_e_accumulate_kernel.xhtml | 9 +- ...ssarm__compute_1_1_n_e_accumulate_squared.xhtml | 9 +- ...compute_1_1_n_e_accumulate_squared_kernel.xhtml | 9 +- ...sarm__compute_1_1_n_e_accumulate_weighted.xhtml | 9 +- ..._1_1_n_e_accumulate_weighted_f_p16_kernel.xhtml | 9 +- ...ompute_1_1_n_e_accumulate_weighted_kernel.xhtml | 9 +- ...lassarm__compute_1_1_n_e_activation_layer.xhtml | 13 +- ...__compute_1_1_n_e_activation_layer_kernel.xhtml | 13 +- ...sarm__compute_1_1_n_e_arithmetic_addition.xhtml | 19 +- ...ompute_1_1_n_e_arithmetic_addition_kernel.xhtml | 15 +- ...m__compute_1_1_n_e_arithmetic_subtraction.xhtml | 19 +- ...ute_1_1_n_e_arithmetic_subtraction_kernel.xhtml | 15 +- ...m__compute_1_1_n_e_batch_normalization_layer.js | 6 + ...compute_1_1_n_e_batch_normalization_layer.xhtml | 300 + ..._n_e_batch_normalization_layer__coll__graph.map | 3 + ..._n_e_batch_normalization_layer__coll__graph.md5 | 1 + ..._n_e_batch_normalization_layer__coll__graph.svg | 31 + ...ute_1_1_n_e_batch_normalization_layer_kernel.js | 11 + ..._1_1_n_e_batch_normalization_layer_kernel.xhtml | 458 + ...ch_normalization_layer_kernel__coll__graph.map} | 6 +- ...tch_normalization_layer_kernel__coll__graph.md5 | 1 + ...ch_normalization_layer_kernel__coll__graph.svg} | 32 +- .../classarm__compute_1_1_n_e_bitwise_and.xhtml | 9 +- ...ssarm__compute_1_1_n_e_bitwise_and_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_bitwise_not.xhtml | 9 +- ...ssarm__compute_1_1_n_e_bitwise_not_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_bitwise_or.xhtml | 9 +- ...assarm__compute_1_1_n_e_bitwise_or_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_bitwise_xor.xhtml | 9 +- ...ssarm__compute_1_1_n_e_bitwise_xor_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_box3x3.xhtml | 9 +- ...sarm__compute_1_1_n_e_box3x3_f_p16_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_box3x3_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_canny_edge.xhtml | 11 +- ...classarm__compute_1_1_n_e_channel_combine.xhtml | 9 +- ...m__compute_1_1_n_e_channel_combine_kernel.xhtml | 9 +- ...classarm__compute_1_1_n_e_channel_extract.xhtml | 9 +- ...m__compute_1_1_n_e_channel_extract_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_col2_im_kernel.xhtml | 11 +- .../classarm__compute_1_1_n_e_color_convert.xhtml | 9 +- ...arm__compute_1_1_n_e_color_convert_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_convolution3x3.xhtml | 11 +- ...ssarm__compute_1_1_n_e_convolution_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_convolution_layer.js | 2 +- ...assarm__compute_1_1_n_e_convolution_layer.xhtml | 43 +- ...te_1_1_n_e_convolution_layer_reshape_weights.js | 6 + ...1_1_n_e_convolution_layer_reshape_weights.xhtml | 280 + ...volution_layer_reshape_weights__coll__graph.map | 3 + ...volution_layer_reshape_weights__coll__graph.md5 | 1 + ...volution_layer_reshape_weights__coll__graph.svg | 32 + ...n_e_convolution_layer_weights_reshape_kernel.js | 11 - ...n_layer_weights_reshape_kernel__coll__graph.md5 | 1 - ...n_layer_weights_reshape_kernel__coll__graph.svg | 45 - ...rm__compute_1_1_n_e_convolution_rectangle.xhtml | 9 +- ...pute_1_1_n_e_convolution_rectangle_kernel.xhtml | 9 +- ...ssarm__compute_1_1_n_e_convolution_square.xhtml | 11 +- ...te_1_1_n_e_cumulative_distribution_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_depth_concatenate.js | 6 + ...assarm__compute_1_1_n_e_depth_concatenate.xhtml | 267 + ...pute_1_1_n_e_depth_concatenate__coll__graph.map | 3 + ...pute_1_1_n_e_depth_concatenate__coll__graph.md5 | 1 + ...pute_1_1_n_e_depth_concatenate__coll__graph.svg | 31 + ...rm__compute_1_1_n_e_depth_concatenate_kernel.js | 12 + ..._compute_1_1_n_e_depth_concatenate_kernel.xhtml | 462 + ..._n_e_depth_concatenate_kernel__coll__graph.map} | 6 +- ...1_n_e_depth_concatenate_kernel__coll__graph.md5 | 1 + ...1_n_e_depth_concatenate_kernel__coll__graph.svg | 44 + .../classarm__compute_1_1_n_e_depth_convert.xhtml | 17 +- ...arm__compute_1_1_n_e_depth_convert_kernel.xhtml | 23 +- .../classarm__compute_1_1_n_e_derivative.xhtml | 11 +- ...assarm__compute_1_1_n_e_derivative_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_dilate.xhtml | 9 +- .../classarm__compute_1_1_n_e_dilate_kernel.xhtml | 9 +- ...rm__compute_1_1_n_e_direct_convolution_layer.js | 6 + ..._compute_1_1_n_e_direct_convolution_layer.xhtml | 289 + ...1_n_e_direct_convolution_layer__coll__graph.map | 3 + ...1_n_e_direct_convolution_layer__coll__graph.md5 | 1 + ...1_n_e_direct_convolution_layer__coll__graph.svg | 31 + ...ect_convolution_layer_bias_accumulate_kernel.js | 11 + ..._convolution_layer_bias_accumulate_kernel.xhtml | 431 + ...n_layer_bias_accumulate_kernel__coll__graph.map | 4 + ...n_layer_bias_accumulate_kernel__coll__graph.md5 | 1 + ...n_layer_bias_accumulate_kernel__coll__graph.svg | 45 + ...pute_1_1_n_e_direct_convolution_layer_kernel.js | 12 + ...e_1_1_n_e_direct_convolution_layer_kernel.xhtml | 465 + ...irect_convolution_layer_kernel__coll__graph.map | 4 + ...irect_convolution_layer_kernel__coll__graph.md5 | 1 + ...irect_convolution_layer_kernel__coll__graph.svg | 45 + ...e_1_1_n_e_edge_non_max_suppression_kernel.xhtml | 9 +- ...assarm__compute_1_1_n_e_edge_trace_kernel.xhtml | 9 +- ...ssarm__compute_1_1_n_e_equalize_histogram.xhtml | 11 +- .../classarm__compute_1_1_n_e_erode.xhtml | 9 +- .../classarm__compute_1_1_n_e_erode_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_fast_corners.xhtml | 11 +- ...sarm__compute_1_1_n_e_fast_corners_kernel.xhtml | 9 +- ...assarm__compute_1_1_n_e_fill_array_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_fill_border.xhtml | 13 +- ...ssarm__compute_1_1_n_e_fill_border_kernel.xhtml | 11 +- ..._compute_1_1_n_e_fill_inner_border_kernel.xhtml | 11 +- ...ssarm__compute_1_1_n_e_fully_connected_layer.js | 2 +- ...rm__compute_1_1_n_e_fully_connected_layer.xhtml | 41 +- ..._1_n_e_fully_connected_layer_reshape_weights.js | 6 + ...n_e_fully_connected_layer_reshape_weights.xhtml | 282 + ...onnected_layer_reshape_weights__coll__graph.map | 3 + ...onnected_layer_reshape_weights__coll__graph.md5 | 1 + ...onnected_layer_reshape_weights__coll__graph.svg | 32 + .../classarm__compute_1_1_n_e_g_e_m_m.xhtml | 15 +- ...rm__compute_1_1_n_e_g_e_m_m_interleave4x4.xhtml | 11 +- ...pute_1_1_n_e_g_e_m_m_interleave4x4_kernel.xhtml | 11 +- .../classarm__compute_1_1_n_e_g_e_m_m_lowp.xhtml | 11 +- ...1_n_e_g_e_m_m_lowp_matrix_multiply_kernel.xhtml | 9 +- ...e_g_e_m_m_matrix_accumulate_biases_kernel.xhtml | 11 +- ...mpute_1_1_n_e_g_e_m_m_matrix_addition_kernel.js | 2 +- ...te_1_1_n_e_g_e_m_m_matrix_addition_kernel.xhtml | 21 +- ...te_1_1_n_e_g_e_m_m_matrix_multiply_kernel.xhtml | 11 +- ...rm__compute_1_1_n_e_g_e_m_m_transpose1x_w.xhtml | 15 +- ...pute_1_1_n_e_g_e_m_m_transpose1x_w_kernel.xhtml | 23 +- .../classarm__compute_1_1_n_e_gaussian3x3.xhtml | 9 +- ...ssarm__compute_1_1_n_e_gaussian3x3_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_gaussian5x5.xhtml | 11 +- ...m__compute_1_1_n_e_gaussian5x5_hor_kernel.xhtml | 9 +- ...__compute_1_1_n_e_gaussian5x5_vert_kernel.xhtml | 9 +- ...lassarm__compute_1_1_n_e_gaussian_pyramid.xhtml | 9 +- ...rm__compute_1_1_n_e_gaussian_pyramid_half.xhtml | 11 +- ...mpute_1_1_n_e_gaussian_pyramid_hor_kernel.xhtml | 9 +- ...arm__compute_1_1_n_e_gaussian_pyramid_orb.xhtml | 11 +- ...pute_1_1_n_e_gaussian_pyramid_vert_kernel.xhtml | 9 +- ...rm__compute_1_1_n_e_gradient_f_p16_kernel.xhtml | 9 +- ...classarm__compute_1_1_n_e_gradient_kernel.xhtml | 9 +- ..._1_1_n_e_h_o_g_block_normalization_kernel.xhtml | 9 +- ...lassarm__compute_1_1_n_e_h_o_g_descriptor.xhtml | 11 +- .../classarm__compute_1_1_n_e_h_o_g_detector.xhtml | 12 +- ...rm__compute_1_1_n_e_h_o_g_detector_kernel.xhtml | 11 +- .../classarm__compute_1_1_n_e_h_o_g_gradient.xhtml | 11 +- ...rm__compute_1_1_n_e_h_o_g_multi_detection.xhtml | 15 +- ..._1_1_n_e_h_o_g_non_maxima_suppression_kernel.js | 11 - ..._non_maxima_suppression_kernel__coll__graph.md5 | 1 - ..._1_1_n_e_h_o_g_orientation_binning_kernel.xhtml | 11 +- .../classarm__compute_1_1_n_e_harris_corners.xhtml | 11 +- ...compute_1_1_n_e_harris_score_f_p16_kernel.xhtml | 11 +- ...sarm__compute_1_1_n_e_harris_score_kernel.xhtml | 11 +- .../classarm__compute_1_1_n_e_histogram.xhtml | 11 +- ...lassarm__compute_1_1_n_e_histogram_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_im2_col_kernel.xhtml | 11 +- .../classarm__compute_1_1_n_e_integral_image.xhtml | 9 +- ...rm__compute_1_1_n_e_integral_image_kernel.xhtml | 9 +- ...ssarm__compute_1_1_n_e_l_k_tracker_kernel.xhtml | 9 +- ...assarm__compute_1_1_n_e_laplacian_pyramid.xhtml | 11 +- ...rm__compute_1_1_n_e_laplacian_reconstruct.xhtml | 11 +- ...arm__compute_1_1_n_e_locally_connected_layer.js | 6 + ...__compute_1_1_n_e_locally_connected_layer.xhtml | 290 + ..._1_n_e_locally_connected_layer__coll__graph.map | 3 + ..._1_n_e_locally_connected_layer__coll__graph.md5 | 1 + ..._1_n_e_locally_connected_layer__coll__graph.svg | 31 + ...n_e_locally_connected_matrix_multiply_kernel.js | 10 + ..._locally_connected_matrix_multiply_kernel.xhtml | 402 + ...nnected_matrix_multiply_kernel__coll__graph.map | 4 + ...nnected_matrix_multiply_kernel__coll__graph.md5 | 1 + ...nnected_matrix_multiply_kernel__coll__graph.svg | 45 + ...arm__compute_1_1_n_e_logits1_d_max_kernel.xhtml | 11 +- ...rm__compute_1_1_n_e_logits1_d_norm_kernel.xhtml | 13 +- ...mpute_1_1_n_e_logits1_d_shift_exp_sum_kernel.js | 1 - ...te_1_1_n_e_logits1_d_shift_exp_sum_kernel.xhtml | 47 +- .../classarm__compute_1_1_n_e_magnitude.xhtml | 9 +- ...pute_1_1_n_e_magnitude_phase_f_p16_kernel.xhtml | 9 +- ...m__compute_1_1_n_e_magnitude_phase_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_mean_std_dev.xhtml | 11 +- ...sarm__compute_1_1_n_e_mean_std_dev_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_median3x3.xhtml | 9 +- ...lassarm__compute_1_1_n_e_median3x3_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_min_max_kernel.xhtml | 9 +- ...lassarm__compute_1_1_n_e_min_max_location.xhtml | 13 +- ...__compute_1_1_n_e_min_max_location_kernel.xhtml | 9 +- ...assarm__compute_1_1_n_e_non_linear_filter.xhtml | 9 +- ..._compute_1_1_n_e_non_linear_filter_kernel.xhtml | 9 +- ...compute_1_1_n_e_non_maxima_suppression3x3.xhtml | 11 +- ..._e_non_maxima_suppression3x3_f_p16_kernel.xhtml | 11 +- ..._1_1_n_e_non_maxima_suppression3x3_kernel.xhtml | 11 +- ...sarm__compute_1_1_n_e_normalization_layer.xhtml | 13 +- ...ompute_1_1_n_e_normalization_layer_kernel.xhtml | 11 +- .../classarm__compute_1_1_n_e_optical_flow.xhtml | 11 +- .../classarm__compute_1_1_n_e_phase.xhtml | 9 +- ...compute_1_1_n_e_pixel_wise_multiplication.xhtml | 15 +- ..._1_1_n_e_pixel_wise_multiplication_kernel.xhtml | 15 +- .../classarm__compute_1_1_n_e_pooling_layer.xhtml | 13 +- ...arm__compute_1_1_n_e_pooling_layer_kernel.xhtml | 11 +- .../classarm__compute_1_1_n_e_remap.xhtml | 9 +- .../classarm__compute_1_1_n_e_remap_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_scale.xhtml | 9 +- .../classarm__compute_1_1_n_e_scale_kernel.xhtml | 13 +- .../classarm__compute_1_1_n_e_scharr3x3.xhtml | 9 +- ...lassarm__compute_1_1_n_e_scharr3x3_kernel.xhtml | 9 +- ..._1_1_n_e_separable_convolution_hor_kernel.xhtml | 9 +- ...1_1_n_e_separable_convolution_vert_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_sobel3x3.xhtml | 9 +- ...classarm__compute_1_1_n_e_sobel3x3_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_sobel5x5.xhtml | 11 +- ...sarm__compute_1_1_n_e_sobel5x5_hor_kernel.xhtml | 9 +- ...arm__compute_1_1_n_e_sobel5x5_vert_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_sobel7x7.xhtml | 11 +- ...sarm__compute_1_1_n_e_sobel7x7_hor_kernel.xhtml | 9 +- ...arm__compute_1_1_n_e_sobel7x7_vert_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_softmax_layer.xhtml | 17 +- .../classarm__compute_1_1_n_e_table_lookup.xhtml | 13 +- ...sarm__compute_1_1_n_e_table_lookup_kernel.xhtml | 11 +- .../classarm__compute_1_1_n_e_threshold.xhtml | 9 +- ...lassarm__compute_1_1_n_e_threshold_kernel.xhtml | 9 +- .../classarm__compute_1_1_n_e_transpose.xhtml | 11 +- ...lassarm__compute_1_1_n_e_transpose_kernel.xhtml | 11 +- .../classarm__compute_1_1_n_e_warp_affine.xhtml | 9 +- ...ssarm__compute_1_1_n_e_warp_affine_kernel.xhtml | 9 +- ...lassarm__compute_1_1_n_e_warp_perspective.xhtml | 9 +- ...__compute_1_1_n_e_warp_perspective_kernel.xhtml | 9 +- ...sarm__compute_1_1_n_e_weights_reshape_kernel.js | 11 + ...__compute_1_1_n_e_weights_reshape_kernel.xhtml} | 111 +- ...1_1_n_e_weights_reshape_kernel__coll__graph.map | 4 + ...1_1_n_e_weights_reshape_kernel__coll__graph.md5 | 1 + ...1_1_n_e_weights_reshape_kernel__coll__graph.svg | 44 + ...arm__compute_1_1_normalization_layer_info.xhtml | 69 +- .../classarm__compute_1_1_o_m_p_scheduler.js | 6 + .../classarm__compute_1_1_o_m_p_scheduler.xhtml | 316 + ...m__compute_1_1_o_m_p_scheduler__coll__graph.map | 3 + ...m__compute_1_1_o_m_p_scheduler__coll__graph.md5 | 1 + ...m__compute_1_1_o_m_p_scheduler__coll__graph.svg | 31 + .../classarm__compute_1_1_pad_stride_info.xhtml | 35 +- documentation/classarm__compute_1_1_pixel_value.js | 2 + .../classarm__compute_1_1_pixel_value.xhtml | 102 +- .../classarm__compute_1_1_pooling_layer_info.xhtml | 41 +- documentation/classarm__compute_1_1_program.xhtml | 9 +- documentation/classarm__compute_1_1_pyramid.xhtml | 9 +- .../classarm__compute_1_1_pyramid_info.xhtml | 9 +- documentation/classarm__compute_1_1_scheduler.js | 9 + .../classarm__compute_1_1_scheduler.xhtml | 358 + ...lassarm__compute_1_1_single_thread_scheduler.js | 6 + ...sarm__compute_1_1_single_thread_scheduler.xhtml | 312 + ...te_1_1_single_thread_scheduler__coll__graph.map | 3 + ...te_1_1_single_thread_scheduler__coll__graph.md5 | 1 + ...te_1_1_single_thread_scheduler__coll__graph.svg | 31 + documentation/classarm__compute_1_1_size2_d.xhtml | 9 +- documentation/classarm__compute_1_1_steps.js | 1 + documentation/classarm__compute_1_1_steps.xhtml | 63 +- documentation/classarm__compute_1_1_strides.js | 1 + documentation/classarm__compute_1_1_strides.xhtml | 63 +- documentation/classarm__compute_1_1_sub_tensor.js | 13 + .../classarm__compute_1_1_sub_tensor.xhtml | 476 + ...assarm__compute_1_1_sub_tensor__coll__graph.map | 3 + ...assarm__compute_1_1_sub_tensor__coll__graph.md5 | 1 + ...assarm__compute_1_1_sub_tensor__coll__graph.svg | 31 + .../classarm__compute_1_1_sub_tensor_info.js | 35 + .../classarm__compute_1_1_sub_tensor_info.xhtml | 1359 ++ ...m__compute_1_1_sub_tensor_info__coll__graph.map | 3 + ...m__compute_1_1_sub_tensor_info__coll__graph.md5 | 1 + ...m__compute_1_1_sub_tensor_info__coll__graph.svg | 31 + documentation/classarm__compute_1_1_tensor.js | 4 +- documentation/classarm__compute_1_1_tensor.xhtml | 43 +- .../classarm__compute_1_1_tensor_allocator.xhtml | 13 +- documentation/classarm__compute_1_1_tensor_info.js | 61 +- .../classarm__compute_1_1_tensor_info.xhtml | 816 +- ...ssarm__compute_1_1_tensor_info__coll__graph.map | 3 + ...ssarm__compute_1_1_tensor_info__coll__graph.md5 | 1 + ...ssarm__compute_1_1_tensor_info__coll__graph.svg | 31 + .../classarm__compute_1_1_tensor_shape.js | 3 + .../classarm__compute_1_1_tensor_shape.xhtml | 184 +- .../classarm__compute_1_1_weights_info.js | 7 + .../classarm__compute_1_1_weights_info.xhtml | 285 + documentation/classarm__compute_1_1_window.js | 4 +- documentation/classarm__compute_1_1_window.xhtml | 222 +- ...lassarm__compute_1_1_window_1_1_dimension.xhtml | 15 +- ...arm__compute_1_1detail_1_1compare__dimension.js | 5 + ...__compute_1_1detail_1_1compare__dimension.xhtml | 257 + ...rm__compute_1_1test_1_1_activation_functions.js | 5 + ..._compute_1_1test_1_1_activation_functions.xhtml | 245 + ...1test_1_1_activation_functions__coll__graph.map | 4 + ...1test_1_1_activation_functions__coll__graph.md5 | 1 + ...1test_1_1_activation_functions__coll__graph.svg | 48 + ...ute_1_1test_1_1_activation_layer_data_object.js | 6 + ..._1_1test_1_1_activation_layer_data_object.xhtml | 228 + ...1_activation_layer_data_object__coll__graph.map | 6 + ...1_activation_layer_data_object__coll__graph.md5 | 1 + ...1_activation_layer_data_object__coll__graph.svg | 73 + ..._1test_1_1_alex_net_activation_layer_dataset.js | 5 + ...est_1_1_alex_net_activation_layer_dataset.xhtml | 231 + ...x_net_activation_layer_dataset__coll__graph.map | 3 + ...x_net_activation_layer_dataset__coll__graph.md5 | 1 + ...x_net_activation_layer_dataset__coll__graph.svg | 31 + ...1test_1_1_alex_net_convolution_layer_dataset.js | 5 + ...st_1_1_alex_net_convolution_layer_dataset.xhtml | 234 + ..._net_convolution_layer_dataset__coll__graph.map | 3 + ..._net_convolution_layer_dataset__coll__graph.md5 | 1 + ..._net_convolution_layer_dataset__coll__graph.svg | 31 + ...t_1_1_alex_net_fully_connected_layer_dataset.js | 5 + ..._1_alex_net_fully_connected_layer_dataset.xhtml | 230 + ..._fully_connected_layer_dataset__coll__graph.map | 3 + ..._fully_connected_layer_dataset__coll__graph.md5 | 1 + ..._fully_connected_layer_dataset__coll__graph.svg | 32 + ...est_1_1_alex_net_normalization_layer_dataset.js | 5 + ..._1_1_alex_net_normalization_layer_dataset.xhtml | 231 + ...et_normalization_layer_dataset__coll__graph.map | 3 + ...et_normalization_layer_dataset__coll__graph.md5 | 1 + ...et_normalization_layer_dataset__coll__graph.svg | 32 + ...e_1_1test_1_1_alex_net_pooling_layer_dataset.js | 5 + ..._1test_1_1_alex_net_pooling_layer_dataset.xhtml | 231 + ...alex_net_pooling_layer_dataset__coll__graph.map | 3 + ...alex_net_pooling_layer_dataset__coll__graph.md5 | 1 + ...alex_net_pooling_layer_dataset__coll__graph.svg | 31 + ...classarm__compute_1_1test_1_1_all_data_types.js | 5 + ...ssarm__compute_1_1test_1_1_all_data_types.xhtml | 247 + ...ute_1_1test_1_1_all_data_types__coll__graph.map | 4 + ...ute_1_1test_1_1_all_data_types__coll__graph.md5 | 1 + ...ute_1_1test_1_1_all_data_types__coll__graph.svg | 45 + ...st_1_1_batch_normalization_layer_data_object.js | 8 + ...1_1_batch_normalization_layer_data_object.xhtml | 289 + ...ormalization_layer_data_object__coll__graph.map | 5 + ...ormalization_layer_data_object__coll__graph.md5 | 1 + ...ormalization_layer_data_object__coll__graph.svg | 61 + .../classarm__compute_1_1test_1_1_border_modes.js | 8 + ...lassarm__compute_1_1test_1_1_border_modes.xhtml | 286 + ...assarm__compute_1_1test_1_1_c_n_n_data_types.js | 5 + ...arm__compute_1_1test_1_1_c_n_n_data_types.xhtml | 235 + ...e_1_1test_1_1_c_n_n_data_types__coll__graph.map | 4 + ...e_1_1test_1_1_c_n_n_data_types__coll__graph.md5 | 1 + ...e_1_1test_1_1_c_n_n_data_types__coll__graph.svg | 45 + ...ute_1_1test_1_1_c_n_n_fixed_point_data_types.js | 5 + ..._1_1test_1_1_c_n_n_fixed_point_data_types.xhtml | 234 + ...1_c_n_n_fixed_point_data_types__coll__graph.map | 4 + ...1_c_n_n_fixed_point_data_types__coll__graph.md5 | 1 + ...1_c_n_n_fixed_point_data_types__coll__graph.svg | 45 + ...__compute_1_1test_1_1_c_n_n_float_data_types.js | 5 + ...ompute_1_1test_1_1_c_n_n_float_data_types.xhtml | 234 + ...est_1_1_c_n_n_float_data_types__coll__graph.map | 4 + ...est_1_1_c_n_n_float_data_types__coll__graph.md5 | 1 + ...est_1_1_c_n_n_float_data_types__coll__graph.svg | 45 + ...assarm__compute_1_1test_1_1_convert_policies.js | 8 + ...arm__compute_1_1test_1_1_convert_policies.xhtml | 286 + ...te_1_1test_1_1_convolution_layer_data_object.js | 10 + ...1_1test_1_1_convolution_layer_data_object.xhtml | 329 + ..._convolution_layer_data_object__coll__graph.map | 6 + ..._convolution_layer_data_object__coll__graph.md5 | 1 + ..._convolution_layer_data_object__coll__graph.svg | 76 + .../classarm__compute_1_1test_1_1_data_types.js | 8 + .../classarm__compute_1_1test_1_1_data_types.xhtml | 287 + ...ompute_1_1test_1_1_direct_convolution_shapes.js | 4 + ...ute_1_1test_1_1_direct_convolution_shapes.xhtml | 208 + ..._1_1_direct_convolution_shapes__coll__graph.map | 4 + ..._1_1_direct_convolution_shapes__coll__graph.md5 | 1 + ..._1_1_direct_convolution_shapes__coll__graph.svg | 45 + ...__compute_1_1test_1_1_fixed_point_data_types.js | 5 + ...ompute_1_1test_1_1_fixed_point_data_types.xhtml | 235 + ...est_1_1_fixed_point_data_types__coll__graph.map | 4 + ...est_1_1_fixed_point_data_types__coll__graph.md5 | 1 + ...est_1_1_fixed_point_data_types__coll__graph.svg | 45 + ...assarm__compute_1_1test_1_1_float_data_types.js | 5 + ...arm__compute_1_1test_1_1_float_data_types.xhtml | 236 + ...e_1_1test_1_1_float_data_types__coll__graph.map | 4 + ...e_1_1test_1_1_float_data_types__coll__graph.md5 | 1 + ...e_1_1test_1_1_float_data_types__coll__graph.svg | 45 + ..._1test_1_1_fully_connected_layer_data_object.js | 11 + ...est_1_1_fully_connected_layer_data_object.xhtml | 338 + ...ly_connected_layer_data_object__coll__graph.map | 5 + ...ly_connected_layer_data_object__coll__graph.md5 | 1 + ...ly_connected_layer_data_object__coll__graph.svg | 62 + ...arm__compute_1_1test_1_1_g_e_m_m_data_object.js | 11 + ...__compute_1_1test_1_1_g_e_m_m_data_object.xhtml | 346 + ..._1test_1_1_g_e_m_m_data_object__coll__graph.map | 5 + ..._1test_1_1_g_e_m_m_data_object__coll__graph.md5 | 1 + ..._1test_1_1_g_e_m_m_data_object__coll__graph.svg | 62 + ...lassarm__compute_1_1test_1_1_generic_dataset.js | 8 + ...sarm__compute_1_1test_1_1_generic_dataset.xhtml | 287 + ...est_1_1_goog_le_net_activation_layer_dataset.js | 5 + ..._1_1_goog_le_net_activation_layer_dataset.xhtml | 231 + ...e_net_activation_layer_dataset__coll__graph.map | 3 + ...e_net_activation_layer_dataset__coll__graph.md5 | 1 + ...e_net_activation_layer_dataset__coll__graph.svg | 33 + ...t_1_1_goog_le_net_convolution_layer_dataset1.js | 5 + ..._1_goog_le_net_convolution_layer_dataset1.xhtml | 235 + ...net_convolution_layer_dataset1__coll__graph.map | 3 + ...net_convolution_layer_dataset1__coll__graph.md5 | 1 + ...net_convolution_layer_dataset1__coll__graph.svg | 33 + ...t_1_1_goog_le_net_convolution_layer_dataset2.js | 5 + ..._1_goog_le_net_convolution_layer_dataset2.xhtml | 234 + ...net_convolution_layer_dataset2__coll__graph.map | 3 + ...net_convolution_layer_dataset2__coll__graph.md5 | 1 + ...net_convolution_layer_dataset2__coll__graph.svg | 33 + ..._1_goog_le_net_fully_connected_layer_dataset.js | 5 + ...goog_le_net_fully_connected_layer_dataset.xhtml | 230 + ..._fully_connected_layer_dataset__coll__graph.map | 3 + ..._fully_connected_layer_dataset__coll__graph.md5 | 1 + ..._fully_connected_layer_dataset__coll__graph.svg | 32 + ...ute_1_1test_1_1_goog_le_net_g_e_m_m_dataset1.js | 5 + ..._1_1test_1_1_goog_le_net_g_e_m_m_dataset1.xhtml | 229 + ...1_goog_le_net_g_e_m_m_dataset1__coll__graph.map | 4 + ...1_goog_le_net_g_e_m_m_dataset1__coll__graph.md5 | 1 + ...1_goog_le_net_g_e_m_m_dataset1__coll__graph.svg | 47 + ...ute_1_1test_1_1_goog_le_net_g_e_m_m_dataset2.js | 5 + ..._1_1test_1_1_goog_le_net_g_e_m_m_dataset2.xhtml | 229 + ...1_goog_le_net_g_e_m_m_dataset2__coll__graph.map | 4 + ...1_goog_le_net_g_e_m_m_dataset2__coll__graph.md5 | 1 + ...1_goog_le_net_g_e_m_m_dataset2__coll__graph.svg | 47 + ..._1_1_goog_le_net_normalization_layer_dataset.js | 5 + ...1_goog_le_net_normalization_layer_dataset.xhtml | 231 + ...et_normalization_layer_dataset__coll__graph.map | 3 + ...et_normalization_layer_dataset__coll__graph.md5 | 1 + ...et_normalization_layer_dataset__coll__graph.svg | 32 + ..._1test_1_1_goog_le_net_pooling_layer_dataset.js | 5 + ...est_1_1_goog_le_net_pooling_layer_dataset.xhtml | 232 + ...g_le_net_pooling_layer_dataset__coll__graph.map | 3 + ...g_le_net_pooling_layer_dataset__coll__graph.md5 | 1 + ...g_le_net_pooling_layer_dataset__coll__graph.svg | 32 + .../classarm__compute_1_1test_1_1_i_accessor.js | 14 + .../classarm__compute_1_1test_1_1_i_accessor.xhtml | 512 + .../classarm__compute_1_1test_1_1_image_dataset.js | 8 + ...assarm__compute_1_1test_1_1_image_dataset.xhtml | 287 + ...__compute_1_1test_1_1_interpolation_policies.js | 8 + ...ompute_1_1test_1_1_interpolation_policies.xhtml | 286 + ...test_1_1_large_fully_connected_layer_dataset.js | 5 + ...t_1_1_large_fully_connected_layer_dataset.xhtml | 230 + ..._fully_connected_layer_dataset__coll__graph.map | 3 + ..._fully_connected_layer_dataset__coll__graph.md5 | 1 + ..._fully_connected_layer_dataset__coll__graph.svg | 32 + ...m__compute_1_1test_1_1_large_g_e_m_m_dataset.js | 5 + ...compute_1_1test_1_1_large_g_e_m_m_dataset.xhtml | 227 + ...test_1_1_large_g_e_m_m_dataset__coll__graph.map | 4 + ...test_1_1_large_g_e_m_m_dataset__coll__graph.md5 | 1 + ...test_1_1_large_g_e_m_m_dataset__coll__graph.svg | 47 + .../classarm__compute_1_1test_1_1_large_images.js | 4 + ...lassarm__compute_1_1test_1_1_large_images.xhtml | 206 + ...mpute_1_1test_1_1_large_images__coll__graph.map | 4 + ...mpute_1_1test_1_1_large_images__coll__graph.md5 | 1 + ...mpute_1_1test_1_1_large_images__coll__graph.svg | 45 + .../classarm__compute_1_1test_1_1_large_shapes.js | 4 + ...lassarm__compute_1_1test_1_1_large_shapes.xhtml | 208 + ...mpute_1_1test_1_1_large_shapes__coll__graph.map | 4 + ...mpute_1_1test_1_1_large_shapes__coll__graph.md5 | 1 + ...mpute_1_1test_1_1_large_shapes__coll__graph.svg | 45 + ...1_1test_1_1_le_net5_activation_layer_dataset.js | 5 + ...test_1_1_le_net5_activation_layer_dataset.xhtml | 231 + ..._net5_activation_layer_dataset__coll__graph.map | 3 + ..._net5_activation_layer_dataset__coll__graph.md5 | 1 + ..._net5_activation_layer_dataset__coll__graph.svg | 31 + ..._1test_1_1_le_net5_convolution_layer_dataset.js | 5 + ...est_1_1_le_net5_convolution_layer_dataset.xhtml | 234 + ...net5_convolution_layer_dataset__coll__graph.map | 3 + ...net5_convolution_layer_dataset__coll__graph.md5 | 1 + ...net5_convolution_layer_dataset__coll__graph.svg | 31 + ...st_1_1_le_net5_fully_connected_layer_dataset.js | 5 + ...1_1_le_net5_fully_connected_layer_dataset.xhtml | 230 + ..._fully_connected_layer_dataset__coll__graph.map | 3 + ..._fully_connected_layer_dataset__coll__graph.md5 | 1 + ..._fully_connected_layer_dataset__coll__graph.svg | 32 + ...te_1_1test_1_1_le_net5_pooling_layer_dataset.js | 5 + ...1_1test_1_1_le_net5_pooling_layer_dataset.xhtml | 231 + ..._le_net5_pooling_layer_dataset__coll__graph.map | 3 + ..._le_net5_pooling_layer_dataset__coll__graph.md5 | 1 + ..._le_net5_pooling_layer_dataset__coll__graph.svg | 31 + ..._1_1test_1_1_normalization_layer_data_object.js | 6 + ...1test_1_1_normalization_layer_data_object.xhtml | 229 + ...ormalization_layer_data_object__coll__graph.map | 6 + ...ormalization_layer_data_object__coll__graph.md5 | 1 + ...ormalization_layer_data_object__coll__graph.svg | 73 + ...arm__compute_1_1test_1_1_normalization_types.js | 8 + ...__compute_1_1test_1_1_normalization_types.xhtml | 286 + ...ompute_1_1test_1_1_pooling_layer_data_object.js | 8 + ...ute_1_1test_1_1_pooling_layer_data_object.xhtml | 291 + ..._1_1_pooling_layer_data_object__coll__graph.map | 6 + ..._1_1_pooling_layer_data_object__coll__graph.md5 | 1 + ..._1_1_pooling_layer_data_object__coll__graph.svg | 74 + ...lassarm__compute_1_1test_1_1_program_options.js | 8 + ...sarm__compute_1_1test_1_1_program_options.xhtml | 312 + ...1_1_random_batch_normalization_layer_dataset.js | 5 + ..._random_batch_normalization_layer_dataset.xhtml | 230 + ...ch_normalization_layer_dataset__coll__graph.map | 3 + ...ch_normalization_layer_dataset__coll__graph.md5 | 1 + ...ch_normalization_layer_dataset__coll__graph.svg | 32 + ...ute_1_1test_1_1_random_pooling_layer_dataset.js | 5 + ..._1_1test_1_1_random_pooling_layer_dataset.xhtml | 232 + ...1_random_pooling_layer_dataset__coll__graph.map | 3 + ...1_random_pooling_layer_dataset__coll__graph.md5 | 1 + ...1_random_pooling_layer_dataset__coll__graph.svg | 31 + .../classarm__compute_1_1test_1_1_raw_tensor.js | 26 + .../classarm__compute_1_1test_1_1_raw_tensor.xhtml | 879 ++ ...ssarm__compute_1_1test_1_1_rounding_policies.js | 8 + ...rm__compute_1_1test_1_1_rounding_policies.xhtml | 286 + .../classarm__compute_1_1test_1_1_shape_dataset.js | 8 + ...assarm__compute_1_1test_1_1_shape_dataset.xhtml | 287 + ...ssarm__compute_1_1test_1_1_signed_data_types.js | 5 + ...rm__compute_1_1test_1_1_signed_data_types.xhtml | 237 + ..._1_1test_1_1_signed_data_types__coll__graph.map | 4 + ..._1_1test_1_1_signed_data_types__coll__graph.md5 | 1 + ..._1_1test_1_1_signed_data_types__coll__graph.svg | 45 + ...classarm__compute_1_1test_1_1_small1_d_shape.js | 4 + ...ssarm__compute_1_1test_1_1_small1_d_shape.xhtml | 208 + ...ute_1_1test_1_1_small1_d_shape__coll__graph.map | 4 + ...ute_1_1test_1_1_small1_d_shape__coll__graph.md5 | 1 + ...ute_1_1test_1_1_small1_d_shape__coll__graph.svg | 45 + ..._1_1test_1_1_small_convolution_layer_dataset.js | 5 + ...1test_1_1_small_convolution_layer_dataset.xhtml | 234 + ...mall_convolution_layer_dataset__coll__graph.map | 3 + ...mall_convolution_layer_dataset__coll__graph.md5 | 1 + ...mall_convolution_layer_dataset__coll__graph.svg | 31 + ...test_1_1_small_fully_connected_layer_dataset.js | 5 + ...t_1_1_small_fully_connected_layer_dataset.xhtml | 230 + ..._fully_connected_layer_dataset__coll__graph.map | 3 + ..._fully_connected_layer_dataset__coll__graph.md5 | 1 + ..._fully_connected_layer_dataset__coll__graph.svg | 32 + ...m__compute_1_1test_1_1_small_g_e_m_m_dataset.js | 5 + ...compute_1_1test_1_1_small_g_e_m_m_dataset.xhtml | 227 + ...test_1_1_small_g_e_m_m_dataset__coll__graph.map | 4 + ...test_1_1_small_g_e_m_m_dataset__coll__graph.md5 | 1 + ...test_1_1_small_g_e_m_m_dataset__coll__graph.svg | 47 + .../classarm__compute_1_1test_1_1_small_images.js | 4 + ...lassarm__compute_1_1test_1_1_small_images.xhtml | 206 + ...mpute_1_1test_1_1_small_images__coll__graph.map | 4 + ...mpute_1_1test_1_1_small_images__coll__graph.md5 | 1 + ...mpute_1_1test_1_1_small_images__coll__graph.svg | 45 + .../classarm__compute_1_1test_1_1_small_shapes.js | 4 + ...lassarm__compute_1_1test_1_1_small_shapes.xhtml | 208 + ...mpute_1_1test_1_1_small_shapes__coll__graph.map | 4 + ...mpute_1_1test_1_1_small_shapes__coll__graph.md5 | 1 + ...mpute_1_1test_1_1_small_shapes__coll__graph.svg | 45 + .../classarm__compute_1_1test_1_1_tensor_cache.js | 7 + ...lassarm__compute_1_1test_1_1_tensor_cache.xhtml | 313 + ...classarm__compute_1_1test_1_1_tensor_library.js | 26 + ...ssarm__compute_1_1test_1_1_tensor_library.xhtml | 1354 ++ ...m__compute_1_1test_1_1_threshold_data_object.js | 10 + ...compute_1_1test_1_1_threshold_data_object.xhtml | 320 + ...ssarm__compute_1_1test_1_1_threshold_dataset.js | 5 + ...rm__compute_1_1test_1_1_threshold_dataset.xhtml | 232 + ..._1_1test_1_1_threshold_dataset__coll__graph.map | 4 + ..._1_1test_1_1_threshold_dataset__coll__graph.md5 | 1 + ..._1_1test_1_1_threshold_dataset__coll__graph.svg | 47 + ...arm__compute_1_1test_1_1_unsigned_data_types.js | 5 + ...__compute_1_1test_1_1_unsigned_data_types.xhtml | 237 + ..._1test_1_1_unsigned_data_types__coll__graph.map | 4 + ..._1test_1_1_unsigned_data_types__coll__graph.md5 | 1 + ..._1test_1_1_unsigned_data_types__coll__graph.svg | 45 + ...te_1_1test_1_1benchmark_1_1_activation_layer.js | 7 + ...1_1test_1_1benchmark_1_1_activation_layer.xhtml | 268 + ...benchmark_1_1_activation_layer__coll__graph.map | 3 + ...benchmark_1_1_activation_layer__coll__graph.md5 | 1 + ...benchmark_1_1_activation_layer__coll__graph.svg | 44 + ...te_1_1test_1_1benchmark_1_1_alex_net_fixture.js | 7 + ...1_1test_1_1benchmark_1_1_alex_net_fixture.xhtml | 264 + ...benchmark_1_1_alex_net_fixture__coll__graph.map | 5 + ...benchmark_1_1_alex_net_fixture__coll__graph.md5 | 1 + ...benchmark_1_1_alex_net_fixture__coll__graph.svg | 92 + ...e_1_1test_1_1benchmark_1_1_convolution_layer.js | 7 + ..._1test_1_1benchmark_1_1_convolution_layer.xhtml | 271 + ...enchmark_1_1_convolution_layer__coll__graph.map | 3 + ...enchmark_1_1_convolution_layer__coll__graph.md5 | 1 + ...enchmark_1_1_convolution_layer__coll__graph.svg | 44 + ...mpute_1_1test_1_1benchmark_1_1_cycle_counter.js | 8 + ...te_1_1test_1_1benchmark_1_1_cycle_counter.xhtml | 324 + ...1_1benchmark_1_1_cycle_counter__coll__graph.map | 3 + ...1_1benchmark_1_1_cycle_counter__coll__graph.md5 | 1 + ...1_1benchmark_1_1_cycle_counter__coll__graph.svg | 31 + ...1test_1_1benchmark_1_1_fully_connected_layer.js | 7 + ...st_1_1benchmark_1_1_fully_connected_layer.xhtml | 271 + ...mark_1_1_fully_connected_layer__coll__graph.map | 3 + ...mark_1_1_fully_connected_layer__coll__graph.md5 | 1 + ...mark_1_1_fully_connected_layer__coll__graph.svg | 44 + ...rm__compute_1_1test_1_1benchmark_1_1_g_e_m_m.js | 9 + ..._compute_1_1test_1_1benchmark_1_1_g_e_m_m.xhtml | 352 + ...1test_1_1benchmark_1_1_g_e_m_m__coll__graph.map | 3 + ...1test_1_1benchmark_1_1_g_e_m_m__coll__graph.md5 | 1 + ...1test_1_1benchmark_1_1_g_e_m_m__coll__graph.svg | 48 + ...1_1test_1_1benchmark_1_1_instruction_counter.js | 8 + ...test_1_1benchmark_1_1_instruction_counter.xhtml | 324 + ...chmark_1_1_instruction_counter__coll__graph.map | 3 + ...chmark_1_1_instruction_counter__coll__graph.md5 | 1 + ...chmark_1_1_instruction_counter__coll__graph.svg | 31 + ..._compute_1_1test_1_1benchmark_1_1_instrument.js | 15 + ...mpute_1_1test_1_1benchmark_1_1_instrument.xhtml | 438 + ..._1benchmark_1_1_instrument_1_1_i_measurement.js | 10 + ...enchmark_1_1_instrument_1_1_i_measurement.xhtml | 336 + ..._1_1benchmark_1_1_instrument_1_1_measurement.js | 5 + ...1benchmark_1_1_instrument_1_1_measurement.xhtml | 234 + ...1_1_instrument_1_1_measurement__coll__graph.map | 3 + ...1_1_instrument_1_1_measurement__coll__graph.md5 | 1 + ...1_1_instrument_1_1_measurement__coll__graph.svg | 31 + ...ute_1_1test_1_1benchmark_1_1_le_net5_fixture.js | 7 + ..._1_1test_1_1benchmark_1_1_le_net5_fixture.xhtml | 259 + ...1benchmark_1_1_le_net5_fixture__coll__graph.map | 3 + ...1benchmark_1_1_le_net5_fixture__coll__graph.md5 | 1 + ...1benchmark_1_1_le_net5_fixture__coll__graph.svg | 46 + ...1_1test_1_1benchmark_1_1_normalization_layer.js | 7 + ...test_1_1benchmark_1_1_normalization_layer.xhtml | 269 + ...chmark_1_1_normalization_layer__coll__graph.map | 3 + ...chmark_1_1_normalization_layer__coll__graph.md5 | 1 + ...chmark_1_1_normalization_layer__coll__graph.svg | 44 + ...mpute_1_1test_1_1benchmark_1_1_pooling_layer.js | 7 + ...te_1_1test_1_1benchmark_1_1_pooling_layer.xhtml | 269 + ...1_1benchmark_1_1_pooling_layer__coll__graph.map | 3 + ...1_1benchmark_1_1_pooling_layer__coll__graph.md5 | 1 + ...1_1benchmark_1_1_pooling_layer__coll__graph.svg | 44 + ...m__compute_1_1test_1_1benchmark_1_1_profiler.js | 9 + ...compute_1_1test_1_1benchmark_1_1_profiler.xhtml | 297 + ...te_1_1test_1_1benchmark_1_1_wall_clock_timer.js | 7 + ...1_1test_1_1benchmark_1_1_wall_clock_timer.xhtml | 301 + ...benchmark_1_1_wall_clock_timer__coll__graph.map | 3 + ...benchmark_1_1_wall_clock_timer__coll__graph.md5 | 1 + ...benchmark_1_1_wall_clock_timer__coll__graph.svg | 31 + ...sarm__compute_1_1test_1_1cl_1_1_c_l_accessor.js | 19 + ...m__compute_1_1test_1_1cl_1_1_c_l_accessor.xhtml | 717 ++ ...1_1test_1_1cl_1_1_c_l_accessor__coll__graph.map | 3 + ...1_1test_1_1cl_1_1_c_l_accessor__coll__graph.md5 | 1 + ...1_1test_1_1cl_1_1_c_l_accessor__coll__graph.svg | 31 + ..._1_1fixed__point__arithmetic_1_1fixed__point.js | 21 + ...1fixed__point__arithmetic_1_1fixed__point.xhtml | 823 ++ ...mpute_1_1test_1_1model__objects_1_1_alex_net.js | 11 + ...te_1_1test_1_1model__objects_1_1_alex_net.xhtml | 419 + ...ompute_1_1test_1_1model__objects_1_1_le_net5.js | 8 + ...ute_1_1test_1_1model__objects_1_1_le_net5.xhtml | 335 + ...rm__compute_1_1test_1_1neon_1_1_n_e_accessor.js | 18 + ..._compute_1_1test_1_1neon_1_1_n_e_accessor.xhtml | 680 + ...1test_1_1neon_1_1_n_e_accessor__coll__graph.map | 3 + ...1test_1_1neon_1_1_n_e_accessor__coll__graph.md5 | 1 + ...1test_1_1neon_1_1_n_e_accessor__coll__graph.svg | 31 + ...1performance_1_1_performance_program_options.js | 4 + ...rformance_1_1_performance_program_options.xhtml | 199 + ..._1_performance_program_options__coll__graph.map | 3 + ..._1_performance_program_options__coll__graph.md5 | 1 + ..._1_performance_program_options__coll__graph.svg | 31 + ...mpute_1_1test_1_1validation_1_1_reference.xhtml | 1860 +++ ...1_1test_1_1validation_1_1_reference_c_p_p.xhtml | 1869 +++ ...validation_1_1_reference_c_p_p__coll__graph.map | 3 + ...validation_1_1_reference_c_p_p__coll__graph.md5 | 1 + ...validation_1_1_reference_c_p_p__coll__graph.svg | 31 + ...rm__compute_1_1test_1_1validation_1_1_tensor.js | 20 + ..._compute_1_1test_1_1validation_1_1_tensor.xhtml | 690 ++ ..._1_1test_1_1validation_1_1_tensor_factory.xhtml | 195 + ...1_1validation_1_1_validation_program_options.js | 4 + ...validation_1_1_validation_program_options.xhtml | 199 + ...1_1_validation_program_options__coll__graph.map | 3 + ...1_1_validation_program_options__coll__graph.md5 | 1 + ...1_1_validation_program_options__coll__graph.svg | 31 + .../classarm__compute_1_1utils_1_1_p_p_m_loader.js | 8 + ...assarm__compute_1_1utils_1_1_p_p_m_loader.xhtml | 377 + documentation/classes.xhtml | 270 +- .../classtest__helpers_1_1_p_p_m_loader.js | 8 - .../classtest__helpers_1_1_p_p_m_loader.xhtml | 376 - documentation/color__convert_8cl.xhtml | 45 +- documentation/color__convert_8cl_source.xhtml | 13 +- documentation/concatenate_8cl.js | 4 + documentation/concatenate_8cl.xhtml | 265 + documentation/concatenate_8cl_source.xhtml | 139 + documentation/convolution3x3_8cl.xhtml | 11 +- documentation/convolution3x3_8cl_source.xhtml | 9 +- documentation/convolution5x5_8cl.xhtml | 9 +- documentation/convolution5x5_8cl_source.xhtml | 9 +- documentation/convolution7x7_8cl.xhtml | 9 +- documentation/convolution7x7_8cl_source.xhtml | 9 +- documentation/convolution9x9_8cl.xhtml | 9 +- documentation/convolution9x9_8cl_source.xhtml | 9 +- documentation/convolution__layer_8cl.js | 2 +- documentation/convolution__layer_8cl.xhtml | 45 +- documentation/convolution__layer_8cl_source.xhtml | 19 +- documentation/convolution__rectangle_8cl.xhtml | 9 +- .../convolution__rectangle_8cl_source.xhtml | 9 +- documentation/depth__convert_8cl.xhtml | 9 +- documentation/depth__convert_8cl_source.xhtml | 9 +- documentation/derivative_8cl.xhtml | 9 +- documentation/derivative_8cl_source.xhtml | 9 +- documentation/dilate_8cl.xhtml | 14 +- documentation/dilate_8cl_source.xhtml | 12 +- documentation/dir_000002_000003.xhtml | 11 +- documentation/dir_000004_000005.xhtml | 11 +- ...000004_000006.xhtml => dir_000004_000008.xhtml} | 11 +- ...000005_000006.xhtml => dir_000005_000008.xhtml} | 11 +- documentation/dir_000006_000007.xhtml | 121 - documentation/dir_000007_000000.xhtml | 122 + ...000008_000000.xhtml => dir_000007_000035.xhtml} | 15 +- documentation/dir_000007_000036.xhtml | 122 + documentation/dir_000007_000037.xhtml | 122 + ...000006_000004.xhtml => dir_000008_000004.xhtml} | 9 +- ...000006_000005.xhtml => dir_000008_000005.xhtml} | 9 +- documentation/dir_000008_000009.xhtml | 122 + documentation/dir_000009_000001.xhtml | 121 - ...000007_000004.xhtml => dir_000009_000004.xhtml} | 9 +- documentation/dir_000010_000001.xhtml | 15 +- documentation/dir_000010_000011.xhtml | 121 - documentation/dir_000011_000000.xhtml | 122 + documentation/dir_000011_000001.xhtml | 121 - documentation/dir_000012_000001.xhtml | 122 + documentation/dir_000012_000013.xhtml | 122 + documentation/dir_000013_000001.xhtml | 15 +- documentation/dir_000013_000014.xhtml | 121 - documentation/dir_000014_000001.xhtml | 121 - documentation/dir_000015_000001.xhtml | 122 + documentation/dir_000015_000016.xhtml | 122 + documentation/dir_000016_000001.xhtml | 122 + ...000015_000000.xhtml => dir_000018_000000.xhtml} | 11 +- ...000015_000008.xhtml => dir_000018_000011.xhtml} | 11 +- documentation/dir_000023_000000.xhtml | 122 + documentation/dir_000023_000034.xhtml | 122 + documentation/dir_000023_000035.xhtml | 122 + documentation/dir_000023_000036.xhtml | 122 + documentation/dir_000023_000037.xhtml | 122 + documentation/dir_000024_000000.xhtml | 122 + documentation/dir_000024_000029.xhtml | 122 + documentation/dir_000024_000035.xhtml | 122 + documentation/dir_000024_000037.xhtml | 122 + documentation/dir_000025_000000.xhtml | 122 + documentation/dir_000025_000029.xhtml | 122 + documentation/dir_000025_000036.xhtml | 122 + documentation/dir_000025_000037.xhtml | 122 + documentation/dir_000026_000000.xhtml | 122 + documentation/dir_000026_000036.xhtml | 122 + documentation/dir_000026_000037.xhtml | 122 + documentation/dir_000027_000000.xhtml | 122 + documentation/dir_000027_000035.xhtml | 122 + documentation/dir_000027_000037.xhtml | 122 + documentation/dir_000028_000000.xhtml | 122 + documentation/dir_000028_000036.xhtml | 122 + documentation/dir_000028_000037.xhtml | 122 + documentation/dir_000029_000037.xhtml | 122 + documentation/dir_000030_000000.xhtml | 122 + documentation/dir_000030_000034.xhtml | 122 + documentation/dir_000030_000035.xhtml | 122 + documentation/dir_000030_000036.xhtml | 122 + documentation/dir_000031_000000.xhtml | 122 + documentation/dir_000031_000033.xhtml | 122 + documentation/dir_000031_000035.xhtml | 122 + documentation/dir_000032_000000.xhtml | 122 + documentation/dir_000032_000033.xhtml | 122 + documentation/dir_000032_000036.xhtml | 122 + documentation/dir_000033_000034.xhtml | 122 + documentation/dir_000035_000000.xhtml | 122 + documentation/dir_000036_000000.xhtml | 122 + documentation/dir_000037_000000.xhtml | 122 + documentation/dir_000038_000037.xhtml | 122 + documentation/dir_000039_000000.xhtml | 122 + documentation/dir_000039_000036.xhtml | 122 + documentation/dir_000040_000000.xhtml | 122 + .../dir_0538a82dae0483a21d0f1e9d7e0a45c6.js | 29 + .../dir_0538a82dae0483a21d0f1e9d7e0a45c6.xhtml | 197 + .../dir_0538a82dae0483a21d0f1e9d7e0a45c6_dep.map | 17 + .../dir_0538a82dae0483a21d0f1e9d7e0a45c6_dep.md5 | 1 + .../dir_0538a82dae0483a21d0f1e9d7e0a45c6_dep.svg | 142 + .../dir_0b3c54e6965035dc3ca2dd424a2d1395.js | 9 + .../dir_0b3c54e6965035dc3ca2dd424a2d1395.xhtml | 146 + .../dir_0b3c54e6965035dc3ca2dd424a2d1395_dep.map | 6 + .../dir_0b3c54e6965035dc3ca2dd424a2d1395_dep.md5 | 1 + .../dir_0b3c54e6965035dc3ca2dd424a2d1395_dep.svg | 45 + .../dir_0c108762cda89b68d4571cbbabdf46df.js | 7 + .../dir_0c108762cda89b68d4571cbbabdf46df.xhtml | 150 + .../dir_0c108762cda89b68d4571cbbabdf46df_dep.map | 6 + .../dir_0c108762cda89b68d4571cbbabdf46df_dep.md5 | 1 + .../dir_0c108762cda89b68d4571cbbabdf46df_dep.svg | 45 + .../dir_1633709b037eda5b8477677909f78129.js | 7 + .../dir_1633709b037eda5b8477677909f78129.xhtml | 15 +- .../dir_1633709b037eda5b8477677909f78129_dep.map | 2 +- .../dir_1633709b037eda5b8477677909f78129_dep.md5 | 2 +- .../dir_1633709b037eda5b8477677909f78129_dep.svg | 4 +- .../dir_1e190f7698f059a15072d193afa255c8.js | 7 + .../dir_1e190f7698f059a15072d193afa255c8.xhtml | 150 + .../dir_1e190f7698f059a15072d193afa255c8_dep.map | 9 + .../dir_1e190f7698f059a15072d193afa255c8_dep.md5 | 1 + .../dir_1e190f7698f059a15072d193afa255c8_dep.svg | 71 + .../dir_1fb090f0c6070330bfaccc4236d3ca0d.js | 14 +- .../dir_1fb090f0c6070330bfaccc4236d3ca0d.xhtml | 21 +- .../dir_1fb090f0c6070330bfaccc4236d3ca0d_dep.map | 4 +- .../dir_1fb090f0c6070330bfaccc4236d3ca0d_dep.md5 | 2 +- .../dir_1fb090f0c6070330bfaccc4236d3ca0d_dep.svg | 4 +- .../dir_214608ef36d61442cb2b0c1c4e9a7def.xhtml | 9 +- .../dir_214608ef36d61442cb2b0c1c4e9a7def_dep.map | 2 +- .../dir_214608ef36d61442cb2b0c1c4e9a7def_dep.md5 | 2 +- .../dir_214608ef36d61442cb2b0c1c4e9a7def_dep.svg | 4 +- .../dir_23b0737c0060f5cc12150dca9a230e10.js | 5 + .../dir_23b0737c0060f5cc12150dca9a230e10.xhtml | 146 + .../dir_23b0737c0060f5cc12150dca9a230e10_dep.map | 6 + .../dir_23b0737c0060f5cc12150dca9a230e10_dep.md5 | 1 + .../dir_23b0737c0060f5cc12150dca9a230e10_dep.svg | 45 + .../dir_251c63bc3f70b60acf43edd6d870a3ef.js | 23 + .../dir_251c63bc3f70b60acf43edd6d870a3ef.xhtml | 23 +- .../dir_251c63bc3f70b60acf43edd6d870a3ef_dep.map | 2 +- .../dir_251c63bc3f70b60acf43edd6d870a3ef_dep.md5 | 2 +- .../dir_251c63bc3f70b60acf43edd6d870a3ef_dep.svg | 4 +- .../dir_25885286e9dad4fa105b7b25a8031bbf.js | 3 + .../dir_25885286e9dad4fa105b7b25a8031bbf.xhtml | 15 +- .../dir_2661e87454575451859bd783c869d609.js | 4 + .../dir_2661e87454575451859bd783c869d609.xhtml | 144 + .../dir_2661e87454575451859bd783c869d609_dep.map | 12 + .../dir_2661e87454575451859bd783c869d609_dep.md5 | 1 + .../dir_2661e87454575451859bd783c869d609_dep.svg | 97 + .../dir_2c3c4cb85e732569e2614ad40a451d53.js | 24 +- .../dir_2c3c4cb85e732569e2614ad40a451d53.xhtml | 25 +- .../dir_2c3c4cb85e732569e2614ad40a451d53_dep.map | 2 +- .../dir_2c3c4cb85e732569e2614ad40a451d53_dep.md5 | 2 +- .../dir_2c3c4cb85e732569e2614ad40a451d53_dep.svg | 2 +- .../dir_315f6cf1ec0b2df3ae747ff8286a19f5.js | 5 +- .../dir_315f6cf1ec0b2df3ae747ff8286a19f5.xhtml | 15 +- .../dir_315f6cf1ec0b2df3ae747ff8286a19f5_dep.map | 4 +- .../dir_315f6cf1ec0b2df3ae747ff8286a19f5_dep.md5 | 2 +- .../dir_315f6cf1ec0b2df3ae747ff8286a19f5_dep.svg | 6 +- .../dir_345bd7154d1542d1e03fd2836959a19d.js | 14 + .../dir_345bd7154d1542d1e03fd2836959a19d.xhtml | 17 +- .../dir_345bd7154d1542d1e03fd2836959a19d_dep.map | 2 +- .../dir_345bd7154d1542d1e03fd2836959a19d_dep.md5 | 2 +- .../dir_345bd7154d1542d1e03fd2836959a19d_dep.svg | 4 +- .../dir_445738e6fed662d0b65e690e3f1e1de0.js | 7 + .../dir_445738e6fed662d0b65e690e3f1e1de0.xhtml | 146 + .../dir_445738e6fed662d0b65e690e3f1e1de0_dep.map | 6 + .../dir_445738e6fed662d0b65e690e3f1e1de0_dep.md5 | 1 + .../dir_445738e6fed662d0b65e690e3f1e1de0_dep.svg | 45 + .../dir_4da487531dc5bdc76b523c9e82b781b1.js | 13 + .../dir_4da487531dc5bdc76b523c9e82b781b1.xhtml | 158 + .../dir_4da487531dc5bdc76b523c9e82b781b1_dep.map | 15 + .../dir_4da487531dc5bdc76b523c9e82b781b1_dep.md5 | 1 + .../dir_4da487531dc5bdc76b523c9e82b781b1_dep.svg | 123 + .../dir_4f2df8950dc650bf7cf9176fae02facc.js | 35 + .../dir_4f2df8950dc650bf7cf9176fae02facc.xhtml | 179 + .../dir_4f2df8950dc650bf7cf9176fae02facc_dep.map | 31 + .../dir_4f2df8950dc650bf7cf9176fae02facc_dep.md5 | 1 + .../dir_4f2df8950dc650bf7cf9176fae02facc_dep.svg | 264 + .../dir_59425e443f801f1f2fd8bbe4959a3ccf.js | 37 + .../dir_59425e443f801f1f2fd8bbe4959a3ccf.xhtml | 189 + .../dir_59425e443f801f1f2fd8bbe4959a3ccf_dep.map | 22 + .../dir_59425e443f801f1f2fd8bbe4959a3ccf_dep.md5 | 1 + .../dir_59425e443f801f1f2fd8bbe4959a3ccf_dep.svg | 187 + .../dir_5df60b4dac2e83e111e155bc5ea540f9.js | 5 + .../dir_5df60b4dac2e83e111e155bc5ea540f9.xhtml | 146 + .../dir_5df60b4dac2e83e111e155bc5ea540f9_dep.map | 11 + .../dir_5df60b4dac2e83e111e155bc5ea540f9_dep.md5 | 1 + .../dir_5df60b4dac2e83e111e155bc5ea540f9_dep.svg | 88 + .../dir_65e289039e1347f87d412b0a1b1a312c.xhtml | 11 +- .../dir_65e289039e1347f87d412b0a1b1a312c_dep.map | 8 +- .../dir_65e289039e1347f87d412b0a1b1a312c_dep.md5 | 2 +- .../dir_65e289039e1347f87d412b0a1b1a312c_dep.svg | 35 +- .../dir_68267d1309a1af8e8297ef4c3efbcdba.xhtml | 9 +- .../dir_7484b841a86f2d7fdd057c9b24fbf230.xhtml | 9 +- .../dir_7484b841a86f2d7fdd057c9b24fbf230_dep.map | 6 +- .../dir_7484b841a86f2d7fdd057c9b24fbf230_dep.md5 | 2 +- .../dir_7484b841a86f2d7fdd057c9b24fbf230_dep.svg | 8 +- .../dir_896191444cf08ec8ade6f88256cb2bdd.js | 7 + .../dir_896191444cf08ec8ade6f88256cb2bdd.xhtml | 146 + .../dir_896191444cf08ec8ade6f88256cb2bdd_dep.map | 6 + .../dir_896191444cf08ec8ade6f88256cb2bdd_dep.md5 | 1 + .../dir_896191444cf08ec8ade6f88256cb2bdd_dep.svg | 45 + .../dir_8bc26130589aa16388b5a02f17abf2c2.js | 18 +- .../dir_8bc26130589aa16388b5a02f17abf2c2.xhtml | 25 +- .../dir_8bc26130589aa16388b5a02f17abf2c2_dep.map | 18 +- .../dir_8bc26130589aa16388b5a02f17abf2c2_dep.md5 | 2 +- .../dir_8bc26130589aa16388b5a02f17abf2c2_dep.svg | 69 +- .../dir_8c278f79c760e5c5fbd911f9870614c1.xhtml | 9 +- .../dir_9b109a3838d873f4037ee21889a49db8.js | 3 + .../dir_9b109a3838d873f4037ee21889a49db8.xhtml | 11 +- .../dir_9b109a3838d873f4037ee21889a49db8_dep.map | 2 +- .../dir_9b109a3838d873f4037ee21889a49db8_dep.md5 | 2 +- .../dir_9b109a3838d873f4037ee21889a49db8_dep.svg | 2 +- .../dir_a133c6c93c82e8721b2f0a6d208afd33.js | 6 + .../dir_a133c6c93c82e8721b2f0a6d208afd33.xhtml | 148 + .../dir_a133c6c93c82e8721b2f0a6d208afd33_dep.map | 20 + .../dir_a133c6c93c82e8721b2f0a6d208afd33_dep.md5 | 1 + .../dir_a133c6c93c82e8721b2f0a6d208afd33_dep.svg | 167 + .../dir_aebb8dcc11953d78e620bbef0b9e2183.xhtml | 9 +- .../dir_b496024afd63bafbf4e8da80d7d3a81a.xhtml | 15 +- .../dir_b496024afd63bafbf4e8da80d7d3a81a_dep.map | 4 + .../dir_b496024afd63bafbf4e8da80d7d3a81a_dep.md5 | 1 + .../dir_b496024afd63bafbf4e8da80d7d3a81a_dep.svg | 28 + .../dir_b88a6d23833cc7e200916be8af5e3057.js | 6 + .../dir_b88a6d23833cc7e200916be8af5e3057.xhtml | 144 + .../dir_b88a6d23833cc7e200916be8af5e3057_dep.map | 4 + .../dir_b88a6d23833cc7e200916be8af5e3057_dep.md5 | 1 + .../dir_b88a6d23833cc7e200916be8af5e3057_dep.svg | 28 + .../dir_b986aa98f9b6490bb4fc7918500346fb.js | 9 + .../dir_b986aa98f9b6490bb4fc7918500346fb.xhtml | 15 +- .../dir_b986aa98f9b6490bb4fc7918500346fb_dep.map | 6 +- .../dir_b986aa98f9b6490bb4fc7918500346fb_dep.md5 | 2 +- .../dir_b986aa98f9b6490bb4fc7918500346fb_dep.svg | 12 +- .../dir_ba1dc693ed461bc4c6534d3b34c816d0.js | 9 + .../dir_ba1dc693ed461bc4c6534d3b34c816d0.xhtml | 146 + .../dir_ba1dc693ed461bc4c6534d3b34c816d0_dep.map | 4 + .../dir_ba1dc693ed461bc4c6534d3b34c816d0_dep.md5 | 1 + .../dir_ba1dc693ed461bc4c6534d3b34c816d0_dep.svg | 28 + .../dir_c1aaaab7f10e8303cab52138d50f8521.js | 5 - .../dir_c1aaaab7f10e8303cab52138d50f8521_dep.map | 5 - .../dir_c1aaaab7f10e8303cab52138d50f8521_dep.md5 | 1 - .../dir_cbdb8362360e11eafe2fa3bc74cf0ffd.js | 5 + ... => dir_cbdb8362360e11eafe2fa3bc74cf0ffd.xhtml} | 23 +- .../dir_cbdb8362360e11eafe2fa3bc74cf0ffd_dep.map | 5 + .../dir_cbdb8362360e11eafe2fa3bc74cf0ffd_dep.md5 | 1 + ...> dir_cbdb8362360e11eafe2fa3bc74cf0ffd_dep.svg} | 20 +- .../dir_d28a4824dc47e487b107a5db32ef43c4.js | 1 + .../dir_d28a4824dc47e487b107a5db32ef43c4.xhtml | 13 +- .../dir_d28a4824dc47e487b107a5db32ef43c4_dep.map | 12 +- .../dir_d28a4824dc47e487b107a5db32ef43c4_dep.md5 | 2 +- .../dir_d28a4824dc47e487b107a5db32ef43c4_dep.svg | 56 +- .../dir_e384de53230b0fb8812f7ca98fdb0e65.js | 18 + .../dir_e384de53230b0fb8812f7ca98fdb0e65.xhtml | 152 + .../dir_e384de53230b0fb8812f7ca98fdb0e65_dep.map | 6 + .../dir_e384de53230b0fb8812f7ca98fdb0e65_dep.md5 | 1 + .../dir_e384de53230b0fb8812f7ca98fdb0e65_dep.svg | 45 + .../dir_e7c7b16542faa38cb4655ff1750d3604.js | 63 + .../dir_e7c7b16542faa38cb4655ff1750d3604.xhtml | 187 + .../dir_e7c7b16542faa38cb4655ff1750d3604_dep.map | 23 + .../dir_e7c7b16542faa38cb4655ff1750d3604_dep.md5 | 1 + .../dir_e7c7b16542faa38cb4655ff1750d3604_dep.svg | 193 + .../dir_e92efd873d292d9829f03334575b107c.js | 5 + .../dir_e92efd873d292d9829f03334575b107c.xhtml | 146 + .../dir_e92efd873d292d9829f03334575b107c_dep.map | 11 + .../dir_e92efd873d292d9829f03334575b107c_dep.md5 | 1 + .../dir_e92efd873d292d9829f03334575b107c_dep.svg | 88 + .../dir_ec05701f68bea22653d08da5856c9ffc.js | 14 + .../dir_ec05701f68bea22653d08da5856c9ffc.xhtml | 160 + .../dir_ec05701f68bea22653d08da5856c9ffc_dep.map | 15 + .../dir_ec05701f68bea22653d08da5856c9ffc_dep.md5 | 1 + .../dir_ec05701f68bea22653d08da5856c9ffc_dep.svg | 123 + .../dir_f482a42326943e614a2ec303fd116962.js | 63 + .../dir_f482a42326943e614a2ec303fd116962.xhtml | 178 + .../dir_f482a42326943e614a2ec303fd116962_dep.map | 6 + .../dir_f482a42326943e614a2ec303fd116962_dep.md5 | 1 + .../dir_f482a42326943e614a2ec303fd116962_dep.svg | 45 + .../dir_f7024513cd67abef53e86ee9382ac5ce.js | 11 + .../dir_f7024513cd67abef53e86ee9382ac5ce.xhtml | 154 + .../dir_f7024513cd67abef53e86ee9382ac5ce_dep.map | 12 + .../dir_f7024513cd67abef53e86ee9382ac5ce_dep.md5 | 1 + .../dir_f7024513cd67abef53e86ee9382ac5ce_dep.svg | 97 + .../dir_f906bbebc596bc606d589ccb705e6595.js | 22 +- .../dir_f906bbebc596bc606d589ccb705e6595.xhtml | 21 +- documentation/erode_8cl.xhtml | 14 +- documentation/erode_8cl_source.xhtml | 12 +- documentation/fast__corners_8cl.xhtml | 19 +- documentation/fast__corners_8cl_source.xhtml | 12 +- documentation/files.js | 3 +- documentation/files.xhtml | 993 +- documentation/fill__border_8cl.xhtml | 9 +- documentation/fill__border_8cl_source.xhtml | 9 +- documentation/functions.xhtml | 23 +- documentation/functions_0x7e.xhtml | 191 +- documentation/functions_a.xhtml | 115 +- documentation/functions_b.xhtml | 62 +- documentation/functions_c.xhtml | 264 +- documentation/functions_d.xhtml | 56 +- documentation/functions_e.xhtml | 30 +- documentation/functions_enum.xhtml | 15 +- documentation/functions_eval.xhtml | 153 + documentation/functions_f.xhtml | 95 +- documentation/functions_func.xhtml | 95 +- documentation/functions_func_0x7e.xhtml | 191 +- documentation/functions_func_b.xhtml | 53 +- documentation/functions_func_c.xhtml | 258 +- documentation/functions_func_d.xhtml | 43 +- documentation/functions_func_e.xhtml | 29 +- documentation/functions_func_f.xhtml | 89 +- documentation/functions_func_g.xhtml | 71 +- documentation/functions_func_h.xhtml | 18 +- documentation/functions_func_i.xhtml | 97 +- documentation/functions_func_k.xhtml | 17 +- documentation/functions_func_l.xhtml | 42 +- documentation/functions_func_m.xhtml | 30 +- documentation/functions_func_n.xhtml | 129 +- documentation/functions_func_o.xhtml | 184 +- documentation/functions_func_p.xhtml | 63 +- documentation/functions_func_q.xhtml | 12 +- documentation/functions_func_r.xhtml | 62 +- documentation/functions_func_s.xhtml | 173 +- documentation/functions_func_t.xhtml | 61 +- documentation/functions_func_u.xhtml | 23 +- documentation/functions_func_v.xhtml | 21 +- documentation/functions_func_w.xhtml | 23 +- documentation/functions_func_x.xhtml | 12 +- documentation/functions_func_y.xhtml | 12 +- documentation/functions_func_z.xhtml | 12 +- documentation/functions_g.xhtml | 72 +- documentation/functions_h.xhtml | 19 +- documentation/functions_i.xhtml | 117 +- documentation/functions_k.xhtml | 17 +- documentation/functions_l.xhtml | 42 +- documentation/functions_m.xhtml | 33 +- documentation/functions_n.xhtml | 140 +- documentation/functions_o.xhtml | 192 +- documentation/functions_p.xhtml | 82 +- documentation/functions_q.xhtml | 12 +- documentation/functions_r.xhtml | 65 +- documentation/functions_rela.xhtml | 159 + documentation/functions_s.xhtml | 214 +- documentation/functions_t.xhtml | 93 +- documentation/functions_type.xhtml | 251 + documentation/functions_u.xhtml | 26 +- documentation/functions_v.xhtml | 23 +- documentation/functions_vars.xhtml | 155 +- documentation/functions_w.xhtml | 28 +- documentation/functions_x.xhtml | 13 +- documentation/functions_y.xhtml | 13 +- documentation/functions_z.xhtml | 12 +- documentation/gaussian__pyramid_8cl.xhtml | 9 +- documentation/gaussian__pyramid_8cl_source.xhtml | 9 +- documentation/gemm_8cl.xhtml | 11 +- documentation/gemm_8cl_source.xhtml | 12 +- documentation/globals.xhtml | 116 +- documentation/globals_a.xhtml | 296 + documentation/globals_b.xhtml | 22 +- documentation/globals_c.xhtml | 15 +- documentation/globals_d.xhtml | 21 +- documentation/globals_defs.xhtml | 62 +- documentation/globals_dup.js | 3 +- documentation/globals_e.xhtml | 12 +- documentation/globals_f.xhtml | 12 +- documentation/globals_func.xhtml | 11 +- documentation/globals_func_b.xhtml | 16 +- documentation/globals_func_c.xhtml | 12 +- documentation/globals_func_d.xhtml | 9 +- documentation/globals_func_e.xhtml | 9 +- documentation/globals_func_f.xhtml | 9 +- documentation/globals_func_g.xhtml | 9 +- documentation/globals_func_h.xhtml | 9 +- documentation/globals_func_i.xhtml | 12 +- documentation/globals_func_l.xhtml | 9 +- documentation/globals_func_m.xhtml | 18 +- documentation/globals_func_n.xhtml | 13 +- documentation/globals_func_o.xhtml | 9 +- documentation/globals_func_p.xhtml | 13 +- documentation/globals_func_r.xhtml | 11 +- documentation/globals_func_s.xhtml | 9 +- documentation/globals_func_t.xhtml | 23 +- documentation/globals_func_u.xhtml | 9 +- documentation/globals_func_v.xhtml | 9 +- documentation/globals_func_w.xhtml | 9 +- documentation/globals_func_y.xhtml | 9 +- documentation/globals_g.xhtml | 12 +- documentation/globals_h.xhtml | 12 +- documentation/globals_i.xhtml | 15 +- documentation/globals_k.xhtml | 12 +- documentation/globals_l.xhtml | 12 +- documentation/globals_m.xhtml | 23 +- documentation/globals_n.xhtml | 16 +- documentation/globals_o.xhtml | 12 +- documentation/globals_p.xhtml | 16 +- documentation/globals_r.xhtml | 14 +- documentation/globals_s.xhtml | 31 +- documentation/globals_t.xhtml | 30 +- documentation/globals_type.xhtml | 12 +- documentation/globals_u.xhtml | 12 +- documentation/globals_v.xhtml | 12 +- documentation/globals_vars.xhtml | 34 +- documentation/globals_w.xhtml | 12 +- documentation/globals_x.xhtml | 12 +- documentation/globals_y.xhtml | 12 +- documentation/graph_legend.xhtml | 9 +- documentation/harris__corners_8cl.xhtml | 9 +- documentation/harris__corners_8cl_source.xhtml | 9 +- documentation/helpers_8h.xhtml | 29 +- documentation/helpers_8h_source.xhtml | 9 +- documentation/hierarchy.js | 288 +- documentation/hierarchy.xhtml | 1039 +- documentation/histogram_8cl.xhtml | 9 +- documentation/histogram_8cl_source.xhtml | 9 +- documentation/hog_8cl.xhtml | 136 + documentation/hog_8cl_source.xhtml | 139 + documentation/index.xhtml | 463 +- documentation/inherit_graph_0.map | 2 +- documentation/inherit_graph_0.md5 | 2 +- documentation/inherit_graph_0.svg | 12 +- documentation/inherit_graph_1.map | 2 +- documentation/inherit_graph_1.md5 | 2 +- documentation/inherit_graph_1.svg | 12 +- documentation/inherit_graph_10.map | 2 +- documentation/inherit_graph_10.md5 | 2 +- documentation/inherit_graph_10.svg | 19 +- documentation/inherit_graph_100.map | 3 + documentation/inherit_graph_100.md5 | 1 + documentation/inherit_graph_100.svg | 21 + documentation/inherit_graph_101.map | 3 + documentation/inherit_graph_101.md5 | 1 + documentation/inherit_graph_101.svg | 21 + documentation/inherit_graph_102.map | 5 + documentation/inherit_graph_102.md5 | 1 + documentation/inherit_graph_102.svg | 47 + documentation/inherit_graph_103.map | 3 + documentation/inherit_graph_103.md5 | 1 + documentation/inherit_graph_103.svg | 21 + documentation/inherit_graph_104.map | 3 + documentation/inherit_graph_104.md5 | 1 + documentation/inherit_graph_104.svg | 21 + documentation/inherit_graph_105.map | 3 + documentation/inherit_graph_105.md5 | 1 + documentation/inherit_graph_105.svg | 21 + documentation/inherit_graph_106.map | 3 + documentation/inherit_graph_106.md5 | 1 + documentation/inherit_graph_106.svg | 21 + documentation/inherit_graph_107.map | 3 + documentation/inherit_graph_107.md5 | 1 + documentation/inherit_graph_107.svg | 21 + documentation/inherit_graph_108.map | 3 + documentation/inherit_graph_108.md5 | 1 + documentation/inherit_graph_108.svg | 21 + documentation/inherit_graph_109.map | 3 + documentation/inherit_graph_109.md5 | 1 + documentation/inherit_graph_109.svg | 21 + documentation/inherit_graph_11.map | 2 +- documentation/inherit_graph_11.md5 | 2 +- documentation/inherit_graph_11.svg | 20 +- documentation/inherit_graph_110.map | 3 + documentation/inherit_graph_110.md5 | 1 + documentation/inherit_graph_110.svg | 21 + documentation/inherit_graph_111.map | 3 + documentation/inherit_graph_111.md5 | 1 + documentation/inherit_graph_111.svg | 21 + documentation/inherit_graph_112.map | 3 + documentation/inherit_graph_112.md5 | 1 + documentation/inherit_graph_112.svg | 21 + documentation/inherit_graph_113.map | 3 + documentation/inherit_graph_113.md5 | 1 + documentation/inherit_graph_113.svg | 21 + documentation/inherit_graph_114.map | 3 + documentation/inherit_graph_114.md5 | 1 + documentation/inherit_graph_114.svg | 21 + documentation/inherit_graph_115.map | 3 + documentation/inherit_graph_115.md5 | 1 + documentation/inherit_graph_115.svg | 21 + documentation/inherit_graph_116.map | 3 + documentation/inherit_graph_116.md5 | 1 + documentation/inherit_graph_116.svg | 21 + documentation/inherit_graph_117.map | 3 + documentation/inherit_graph_117.md5 | 1 + documentation/inherit_graph_117.svg | 21 + documentation/inherit_graph_118.map | 3 + documentation/inherit_graph_118.md5 | 1 + documentation/inherit_graph_118.svg | 21 + documentation/inherit_graph_119.map | 3 + documentation/inherit_graph_119.md5 | 1 + documentation/inherit_graph_119.svg | 21 + documentation/inherit_graph_12.map | 3 +- documentation/inherit_graph_12.md5 | 2 +- documentation/inherit_graph_12.svg | 28 +- documentation/inherit_graph_120.map | 3 + documentation/inherit_graph_120.md5 | 1 + documentation/inherit_graph_120.svg | 21 + documentation/inherit_graph_121.map | 3 + documentation/inherit_graph_121.md5 | 1 + documentation/inherit_graph_121.svg | 21 + documentation/inherit_graph_122.map | 3 + documentation/inherit_graph_122.md5 | 1 + documentation/inherit_graph_122.svg | 21 + documentation/inherit_graph_123.map | 4 + documentation/inherit_graph_123.md5 | 1 + documentation/inherit_graph_123.svg | 34 + documentation/inherit_graph_124.map | 3 + documentation/inherit_graph_124.md5 | 1 + documentation/inherit_graph_124.svg | 21 + documentation/inherit_graph_125.map | 3 + documentation/inherit_graph_125.md5 | 1 + documentation/inherit_graph_125.svg | 21 + documentation/inherit_graph_126.map | 4 + documentation/inherit_graph_126.md5 | 1 + documentation/inherit_graph_126.svg | 34 + documentation/inherit_graph_127.map | 6 + documentation/inherit_graph_127.md5 | 1 + documentation/inherit_graph_127.svg | 60 + documentation/inherit_graph_128.map | 3 + documentation/inherit_graph_128.md5 | 1 + documentation/inherit_graph_128.svg | 21 + documentation/inherit_graph_129.map | 3 + documentation/inherit_graph_129.md5 | 1 + documentation/inherit_graph_129.svg | 21 + documentation/inherit_graph_13.map | 4 +- documentation/inherit_graph_13.md5 | 2 +- documentation/inherit_graph_13.svg | 40 +- documentation/inherit_graph_130.map | 3 + documentation/inherit_graph_130.md5 | 1 + documentation/inherit_graph_130.svg | 21 + documentation/inherit_graph_131.map | 3 + documentation/inherit_graph_131.md5 | 1 + documentation/inherit_graph_131.svg | 21 + documentation/inherit_graph_132.map | 3 + documentation/inherit_graph_132.md5 | 1 + documentation/inherit_graph_132.svg | 21 + documentation/inherit_graph_133.map | 3 + documentation/inherit_graph_133.md5 | 1 + documentation/inherit_graph_133.svg | 21 + documentation/inherit_graph_134.map | 3 + documentation/inherit_graph_134.md5 | 1 + documentation/inherit_graph_134.svg | 21 + documentation/inherit_graph_135.map | 3 + documentation/inherit_graph_135.md5 | 1 + documentation/inherit_graph_135.svg | 21 + documentation/inherit_graph_136.map | 4 + documentation/inherit_graph_136.md5 | 1 + documentation/inherit_graph_136.svg | 34 + documentation/inherit_graph_137.map | 3 + documentation/inherit_graph_137.md5 | 1 + documentation/inherit_graph_137.svg | 21 + documentation/inherit_graph_138.map | 3 + documentation/inherit_graph_138.md5 | 1 + documentation/inherit_graph_138.svg | 21 + documentation/inherit_graph_139.map | 3 + documentation/inherit_graph_139.md5 | 1 + documentation/inherit_graph_139.svg | 21 + documentation/inherit_graph_14.map | 2 +- documentation/inherit_graph_14.md5 | 2 +- documentation/inherit_graph_14.svg | 12 +- documentation/inherit_graph_140.map | 3 + documentation/inherit_graph_140.md5 | 1 + documentation/inherit_graph_140.svg | 21 + documentation/inherit_graph_141.map | 3 + documentation/inherit_graph_141.md5 | 1 + documentation/inherit_graph_141.svg | 21 + documentation/inherit_graph_15.map | 3 +- documentation/inherit_graph_15.md5 | 2 +- documentation/inherit_graph_15.svg | 28 +- documentation/inherit_graph_16.map | 2 +- documentation/inherit_graph_16.md5 | 2 +- documentation/inherit_graph_16.svg | 12 +- documentation/inherit_graph_17.map | 8 +- documentation/inherit_graph_17.md5 | 2 +- documentation/inherit_graph_17.svg | 92 +- documentation/inherit_graph_18.map | 3 +- documentation/inherit_graph_18.md5 | 2 +- documentation/inherit_graph_18.svg | 25 +- documentation/inherit_graph_19.map | 5 +- documentation/inherit_graph_19.md5 | 2 +- documentation/inherit_graph_19.svg | 53 +- documentation/inherit_graph_2.map | 2 +- documentation/inherit_graph_2.md5 | 2 +- documentation/inherit_graph_2.svg | 12 +- documentation/inherit_graph_20.map | 6 +- documentation/inherit_graph_20.md5 | 2 +- documentation/inherit_graph_20.svg | 66 +- documentation/inherit_graph_21.map | 137 +- documentation/inherit_graph_21.md5 | 2 +- documentation/inherit_graph_21.svg | 1772 +-- documentation/inherit_graph_22.map | 3 +- documentation/inherit_graph_22.md5 | 2 +- documentation/inherit_graph_22.svg | 25 +- documentation/inherit_graph_23.map | 175 +- documentation/inherit_graph_23.md5 | 2 +- documentation/inherit_graph_23.svg | 2290 +--- documentation/inherit_graph_24.map | 5 +- documentation/inherit_graph_24.md5 | 2 +- documentation/inherit_graph_24.svg | 53 +- documentation/inherit_graph_25.map | 4 +- documentation/inherit_graph_25.md5 | 2 +- documentation/inherit_graph_25.svg | 40 +- documentation/inherit_graph_26.map | 2 +- documentation/inherit_graph_26.md5 | 2 +- documentation/inherit_graph_26.svg | 12 +- documentation/inherit_graph_27.map | 3 +- documentation/inherit_graph_27.md5 | 2 +- documentation/inherit_graph_27.svg | 25 +- documentation/inherit_graph_28.map | 5 +- documentation/inherit_graph_28.md5 | 2 +- documentation/inherit_graph_28.svg | 53 +- documentation/inherit_graph_29.map | 4 +- documentation/inherit_graph_29.md5 | 2 +- documentation/inherit_graph_29.svg | 40 +- documentation/inherit_graph_3.map | 2 +- documentation/inherit_graph_3.md5 | 2 +- documentation/inherit_graph_3.svg | 12 +- documentation/inherit_graph_30.map | 3 +- documentation/inherit_graph_30.md5 | 2 +- documentation/inherit_graph_30.svg | 25 +- documentation/inherit_graph_31.map | 6 +- documentation/inherit_graph_31.md5 | 2 +- documentation/inherit_graph_31.svg | 32 +- documentation/inherit_graph_32.map | 6 +- documentation/inherit_graph_32.md5 | 2 +- documentation/inherit_graph_32.svg | 50 +- documentation/inherit_graph_33.map | 6 +- documentation/inherit_graph_33.md5 | 2 +- documentation/inherit_graph_33.svg | 30 +- documentation/inherit_graph_34.map | 2 +- documentation/inherit_graph_34.md5 | 2 +- documentation/inherit_graph_34.svg | 12 +- documentation/inherit_graph_35.map | 2 +- documentation/inherit_graph_35.md5 | 2 +- documentation/inherit_graph_35.svg | 12 +- documentation/inherit_graph_36.map | 2 +- documentation/inherit_graph_36.md5 | 2 +- documentation/inherit_graph_36.svg | 12 +- documentation/inherit_graph_37.map | 3 +- documentation/inherit_graph_37.md5 | 2 +- documentation/inherit_graph_37.svg | 25 +- documentation/inherit_graph_38.map | 4 +- documentation/inherit_graph_38.md5 | 2 +- documentation/inherit_graph_38.svg | 40 +- documentation/inherit_graph_39.map | 2 +- documentation/inherit_graph_39.md5 | 2 +- documentation/inherit_graph_39.svg | 12 +- documentation/inherit_graph_4.map | 2 +- documentation/inherit_graph_4.md5 | 2 +- documentation/inherit_graph_4.svg | 12 +- documentation/inherit_graph_40.map | 3 +- documentation/inherit_graph_40.md5 | 2 +- documentation/inherit_graph_40.svg | 28 +- documentation/inherit_graph_41.map | 2 +- documentation/inherit_graph_41.md5 | 2 +- documentation/inherit_graph_41.svg | 15 +- documentation/inherit_graph_42.map | 2 +- documentation/inherit_graph_42.md5 | 2 +- documentation/inherit_graph_42.svg | 12 +- documentation/inherit_graph_43.map | 2 +- documentation/inherit_graph_43.md5 | 2 +- documentation/inherit_graph_43.svg | 12 +- documentation/inherit_graph_44.map | 2 +- documentation/inherit_graph_44.md5 | 2 +- documentation/inherit_graph_44.svg | 12 +- documentation/inherit_graph_45.map | 2 +- documentation/inherit_graph_45.md5 | 2 +- documentation/inherit_graph_45.svg | 12 +- documentation/inherit_graph_46.map | 2 +- documentation/inherit_graph_46.md5 | 2 +- documentation/inherit_graph_46.svg | 12 +- documentation/inherit_graph_47.map | 3 +- documentation/inherit_graph_47.md5 | 2 +- documentation/inherit_graph_47.svg | 28 +- documentation/inherit_graph_48.map | 4 +- documentation/inherit_graph_48.md5 | 2 +- documentation/inherit_graph_48.svg | 41 +- documentation/inherit_graph_49.map | 4 +- documentation/inherit_graph_49.md5 | 2 +- documentation/inherit_graph_49.svg | 41 +- documentation/inherit_graph_5.map | 2 +- documentation/inherit_graph_5.md5 | 2 +- documentation/inherit_graph_5.svg | 12 +- documentation/inherit_graph_50.map | 22 +- documentation/inherit_graph_50.md5 | 2 +- documentation/inherit_graph_50.svg | 286 +- documentation/inherit_graph_51.map | 3 +- documentation/inherit_graph_51.md5 | 2 +- documentation/inherit_graph_51.svg | 28 +- documentation/inherit_graph_52.map | 2 +- documentation/inherit_graph_52.md5 | 2 +- documentation/inherit_graph_52.svg | 12 +- documentation/inherit_graph_53.map | 4 +- documentation/inherit_graph_53.md5 | 2 +- documentation/inherit_graph_53.svg | 40 +- documentation/inherit_graph_54.map | 8 +- documentation/inherit_graph_54.md5 | 2 +- documentation/inherit_graph_54.svg | 92 +- documentation/inherit_graph_55.map | 4 + documentation/inherit_graph_55.md5 | 1 + documentation/inherit_graph_55.svg | 34 + documentation/inherit_graph_56.map | 6 + documentation/inherit_graph_56.md5 | 1 + documentation/inherit_graph_56.svg | 60 + documentation/inherit_graph_57.map | 7 + documentation/inherit_graph_57.md5 | 1 + documentation/inherit_graph_57.svg | 73 + documentation/inherit_graph_58.map | 153 + documentation/inherit_graph_58.md5 | 1 + documentation/inherit_graph_58.svg | 1977 +++ documentation/inherit_graph_59.map | 6 + documentation/inherit_graph_59.md5 | 1 + documentation/inherit_graph_59.svg | 60 + documentation/inherit_graph_6.map | 17 +- documentation/inherit_graph_6.md5 | 2 +- documentation/inherit_graph_6.svg | 228 +- documentation/inherit_graph_60.map | 189 + documentation/inherit_graph_60.md5 | 1 + documentation/inherit_graph_60.svg | 2472 ++++ documentation/inherit_graph_61.map | 6 + documentation/inherit_graph_61.md5 | 1 + documentation/inherit_graph_61.svg | 60 + documentation/inherit_graph_62.map | 5 + documentation/inherit_graph_62.md5 | 1 + documentation/inherit_graph_62.svg | 47 + documentation/inherit_graph_63.map | 3 + documentation/inherit_graph_63.md5 | 1 + documentation/inherit_graph_63.svg | 21 + documentation/inherit_graph_64.map | 4 + documentation/inherit_graph_64.md5 | 1 + documentation/inherit_graph_64.svg | 34 + documentation/inherit_graph_65.map | 4 + documentation/inherit_graph_65.md5 | 1 + documentation/inherit_graph_65.svg | 34 + documentation/inherit_graph_66.map | 3 + documentation/inherit_graph_66.md5 | 1 + documentation/inherit_graph_66.svg | 21 + documentation/inherit_graph_67.map | 6 + documentation/inherit_graph_67.md5 | 1 + documentation/inherit_graph_67.svg | 60 + documentation/inherit_graph_68.map | 6 + documentation/inherit_graph_68.md5 | 1 + documentation/inherit_graph_68.svg | 60 + documentation/inherit_graph_69.map | 6 + documentation/inherit_graph_69.md5 | 1 + documentation/inherit_graph_69.svg | 60 + documentation/inherit_graph_7.map | 9 +- documentation/inherit_graph_7.md5 | 2 +- documentation/inherit_graph_7.svg | 135 +- documentation/inherit_graph_70.map | 4 + documentation/inherit_graph_70.md5 | 1 + documentation/inherit_graph_70.svg | 34 + documentation/inherit_graph_71.map | 3 + documentation/inherit_graph_71.md5 | 1 + documentation/inherit_graph_71.svg | 21 + documentation/inherit_graph_72.map | 3 + documentation/inherit_graph_72.md5 | 1 + documentation/inherit_graph_72.svg | 21 + documentation/inherit_graph_73.map | 3 + documentation/inherit_graph_73.md5 | 1 + documentation/inherit_graph_73.svg | 21 + documentation/inherit_graph_74.map | 5 + documentation/inherit_graph_74.md5 | 1 + documentation/inherit_graph_74.svg | 47 + documentation/inherit_graph_75.map | 3 + documentation/inherit_graph_75.md5 | 1 + documentation/inherit_graph_75.svg | 32 + documentation/inherit_graph_76.map | 4 + documentation/inherit_graph_76.md5 | 1 + documentation/inherit_graph_76.svg | 36 + documentation/inherit_graph_77.map | 3 + documentation/inherit_graph_77.md5 | 1 + documentation/inherit_graph_77.svg | 32 + documentation/inherit_graph_78.map | 3 + documentation/inherit_graph_78.md5 | 1 + documentation/inherit_graph_78.svg | 21 + documentation/inherit_graph_79.map | 32 + documentation/inherit_graph_79.md5 | 1 + documentation/inherit_graph_79.svg | 444 + documentation/inherit_graph_8.map | 2 +- documentation/inherit_graph_8.md5 | 2 +- documentation/inherit_graph_8.svg | 12 +- documentation/inherit_graph_80.map | 6 + documentation/inherit_graph_80.md5 | 1 + documentation/inherit_graph_80.svg | 60 + documentation/inherit_graph_81.map | 8 + documentation/inherit_graph_81.md5 | 1 + documentation/inherit_graph_81.svg | 86 + documentation/inherit_graph_82.map | 5 + documentation/inherit_graph_82.md5 | 1 + documentation/inherit_graph_82.svg | 47 + documentation/inherit_graph_83.map | 5 + documentation/inherit_graph_83.md5 | 1 + documentation/inherit_graph_83.svg | 47 + documentation/inherit_graph_84.map | 3 + documentation/inherit_graph_84.md5 | 1 + documentation/inherit_graph_84.svg | 21 + documentation/inherit_graph_85.map | 3 + documentation/inherit_graph_85.md5 | 1 + documentation/inherit_graph_85.svg | 21 + documentation/inherit_graph_86.map | 3 + documentation/inherit_graph_86.md5 | 1 + documentation/inherit_graph_86.svg | 21 + documentation/inherit_graph_87.map | 3 + documentation/inherit_graph_87.md5 | 1 + documentation/inherit_graph_87.svg | 21 + documentation/inherit_graph_88.map | 3 + documentation/inherit_graph_88.md5 | 1 + documentation/inherit_graph_88.svg | 25 + documentation/inherit_graph_89.map | 3 + documentation/inherit_graph_89.md5 | 1 + documentation/inherit_graph_89.svg | 21 + documentation/inherit_graph_9.map | 2 +- documentation/inherit_graph_9.md5 | 2 +- documentation/inherit_graph_9.svg | 12 +- documentation/inherit_graph_90.map | 3 + documentation/inherit_graph_90.md5 | 1 + documentation/inherit_graph_90.svg | 21 + documentation/inherit_graph_91.map | 3 + documentation/inherit_graph_91.md5 | 1 + documentation/inherit_graph_91.svg | 21 + documentation/inherit_graph_92.map | 3 + documentation/inherit_graph_92.md5 | 1 + documentation/inherit_graph_92.svg | 21 + documentation/inherit_graph_93.map | 3 + documentation/inherit_graph_93.md5 | 1 + documentation/inherit_graph_93.svg | 21 + documentation/inherit_graph_94.map | 3 + documentation/inherit_graph_94.md5 | 1 + documentation/inherit_graph_94.svg | 21 + documentation/inherit_graph_95.map | 3 + documentation/inherit_graph_95.md5 | 1 + documentation/inherit_graph_95.svg | 21 + documentation/inherit_graph_96.map | 3 + documentation/inherit_graph_96.md5 | 1 + documentation/inherit_graph_96.svg | 21 + documentation/inherit_graph_97.map | 3 + documentation/inherit_graph_97.md5 | 1 + documentation/inherit_graph_97.svg | 21 + documentation/inherit_graph_98.map | 3 + documentation/inherit_graph_98.md5 | 1 + documentation/inherit_graph_98.svg | 21 + documentation/inherit_graph_99.map | 3 + documentation/inherit_graph_99.md5 | 1 + documentation/inherit_graph_99.svg | 21 + documentation/inherits.xhtml | 293 +- documentation/integral__image_8cl.xhtml | 9 +- documentation/integral__image_8cl_source.xhtml | 9 +- documentation/magnitude__phase_8cl.xhtml | 38 +- documentation/magnitude__phase_8cl_source.xhtml | 20 +- documentation/mean__stddev_8cl.xhtml | 9 +- documentation/mean__stddev_8cl_source.xhtml | 9 +- documentation/minmaxloc_8cl.xhtml | 15 +- documentation/minmaxloc_8cl_source.xhtml | 13 +- documentation/model__objects_2_alex_net_8h.xhtml | 156 + .../model__objects_2_alex_net_8h_source.xhtml | 165 + documentation/model__objects_2_le_net5_8h.xhtml | 156 + .../model__objects_2_le_net5_8h_source.xhtml | 157 + documentation/namespacearm__compute.js | 52 +- documentation/namespacearm__compute.xhtml | 12332 +++++++++++++++---- documentation/namespacearm__compute_1_1cpp14.js | 6 + documentation/namespacearm__compute_1_1cpp14.xhtml | 100 +- documentation/namespacearm__compute_1_1detail.js | 4 + .../namespacearm__compute_1_1detail.xhtml | 193 + documentation/namespacearm__compute_1_1test.js | 76 + documentation/namespacearm__compute_1_1test.xhtml | 1276 ++ .../namespacearm__compute_1_1test_1_1benchmark.js | 16 + ...amespacearm__compute_1_1test_1_1benchmark.xhtml | 236 + .../namespacearm__compute_1_1test_1_1cl.js | 4 + .../namespacearm__compute_1_1test_1_1cl.xhtml | 265 + .../namespacearm__compute_1_1test_1_1cpp11.xhtml | 509 + .../namespacearm__compute_1_1test_1_1cpp14.js | 6 + .../namespacearm__compute_1_1test_1_1cpp14.xhtml | 224 + ..._compute_1_1test_1_1fixed__point__arithmetic.js | 6 + ...mpute_1_1test_1_1fixed__point__arithmetic.xhtml | 233 + ..._1test_1_1fixed__point__arithmetic_1_1detail.js | 5 + ...est_1_1fixed__point__arithmetic_1_1detail.xhtml | 1161 ++ ..._1test_1_1fixed__point__arithmetic_1_1traits.js | 12 + ...est_1_1fixed__point__arithmetic_1_1traits.xhtml | 161 + ...espacearm__compute_1_1test_1_1model__objects.js | 5 + ...acearm__compute_1_1test_1_1model__objects.xhtml | 144 + .../namespacearm__compute_1_1test_1_1neon.js | 4 + .../namespacearm__compute_1_1test_1_1neon.xhtml | 270 + ...namespacearm__compute_1_1test_1_1performance.js | 5 + ...espacearm__compute_1_1test_1_1performance.xhtml | 144 + .../namespacearm__compute_1_1test_1_1traits.js | 11 + .../namespacearm__compute_1_1test_1_1traits.xhtml | 193 + .../namespacearm__compute_1_1test_1_1validation.js | 11 + ...mespacearm__compute_1_1test_1_1validation.xhtml | 663 + ...pacearm__compute_1_1test_1_1validation_1_1cl.js | 4 + ...earm__compute_1_1test_1_1validation_1_1cl.xhtml | 140 + ...1test_1_1validation_1_1tensor__operations.xhtml | 1752 +++ ...te_1_1test_1_1validation_1_1tensor__visitors.js | 19 + ...1_1test_1_1validation_1_1tensor__visitors.xhtml | 240 + ...earm__compute_1_1test_1_1validation_1_1vx.xhtml | 172 + documentation/namespacearm__compute_1_1traits.js | 7 + .../namespacearm__compute_1_1traits.xhtml | 147 + documentation/namespacearm__compute_1_1utils.js | 4 + ....xhtml => namespacearm__compute_1_1utils.xhtml} | 96 +- documentation/namespaceboost.js | 4 + documentation/namespaceboost.xhtml | 140 + documentation/namespaceboost_1_1unit__test.js | 4 + documentation/namespaceboost_1_1unit__test.xhtml | 140 + .../namespaceboost_1_1unit__test_1_1data.js | 4 + .../namespaceboost_1_1unit__test_1_1data.xhtml | 140 + ...ceboost_1_1unit__test_1_1data_1_1monomorphic.js | 33 + ...oost_1_1unit__test_1_1data_1_1monomorphic.xhtml | 227 + documentation/namespacecl.xhtml | 9 +- documentation/namespacemembers.xhtml | 718 +- documentation/namespacemembers_b.xhtml | 195 + documentation/namespacemembers_c.xhtml | 350 + documentation/namespacemembers_d.xhtml | 240 + documentation/namespacemembers_dup.js | 24 + documentation/namespacemembers_e.xhtml | 255 + documentation/namespacemembers_enum.xhtml | 19 +- documentation/namespacemembers_f.xhtml | 196 + documentation/namespacemembers_func.js | 22 + documentation/namespacemembers_func.xhtml | 378 +- documentation/namespacemembers_func_b.xhtml | 184 + documentation/namespacemembers_func_c.xhtml | 252 + documentation/namespacemembers_func_d.xhtml | 229 + documentation/namespacemembers_func_e.xhtml | 241 + documentation/namespacemembers_func_f.xhtml | 182 + documentation/namespacemembers_func_g.xhtml | 190 + documentation/namespacemembers_func_h.xhtml | 166 + documentation/namespacemembers_func_i.xhtml | 184 + documentation/namespacemembers_func_l.xhtml | 169 + documentation/namespacemembers_func_m.xhtml | 182 + documentation/namespacemembers_func_n.xhtml | 172 + documentation/namespacemembers_func_o.xhtml | 207 + documentation/namespacemembers_func_p.xhtml | 199 + documentation/namespacemembers_func_r.xhtml | 187 + documentation/namespacemembers_func_s.xhtml | 310 + documentation/namespacemembers_func_t.xhtml | 178 + documentation/namespacemembers_func_u.xhtml | 166 + documentation/namespacemembers_func_v.xhtml | 412 + documentation/namespacemembers_g.xhtml | 195 + documentation/namespacemembers_h.xhtml | 171 + documentation/namespacemembers_i.xhtml | 288 + documentation/namespacemembers_k.xhtml | 168 + documentation/namespacemembers_l.xhtml | 186 + documentation/namespacemembers_m.xhtml | 196 + documentation/namespacemembers_n.xhtml | 225 + documentation/namespacemembers_o.xhtml | 212 + documentation/namespacemembers_p.xhtml | 216 + documentation/namespacemembers_q.xhtml | 222 + documentation/namespacemembers_r.xhtml | 193 + documentation/namespacemembers_s.xhtml | 321 + documentation/namespacemembers_t.xhtml | 189 + documentation/namespacemembers_type.xhtml | 132 +- documentation/namespacemembers_u.xhtml | 180 + documentation/namespacemembers_v.xhtml | 417 + documentation/namespacemembers_vars.xhtml | 27 +- documentation/namespaces.js | 4 +- documentation/namespaces.xhtml | 38 +- documentation/namespacetest__helpers.js | 4 - documentation/navtreedata.js | 112 +- documentation/navtreeindex0.js | 500 +- documentation/navtreeindex1.js | 500 +- documentation/navtreeindex10.js | 500 +- documentation/navtreeindex11.js | 500 +- documentation/navtreeindex12.js | 500 +- documentation/navtreeindex13.js | 500 +- documentation/navtreeindex14.js | 500 +- documentation/navtreeindex15.js | 403 +- documentation/navtreeindex16.js | 253 + documentation/navtreeindex17.js | 253 + documentation/navtreeindex18.js | 253 + documentation/navtreeindex19.js | 253 + documentation/navtreeindex2.js | 500 +- documentation/navtreeindex20.js | 253 + documentation/navtreeindex21.js | 253 + documentation/navtreeindex22.js | 253 + documentation/navtreeindex23.js | 91 + documentation/navtreeindex3.js | 500 +- documentation/navtreeindex4.js | 500 +- documentation/navtreeindex5.js | 500 +- documentation/navtreeindex6.js | 500 +- documentation/navtreeindex7.js | 500 +- documentation/navtreeindex8.js | 500 +- documentation/navtreeindex9.js | 500 +- documentation/neon__cnn_8cpp.js | 5 + documentation/neon__cnn_8cpp.xhtml | 252 + documentation/neon__cnn_8cpp_source.xhtml | 171 + documentation/neon__convolution_8cpp.xhtml | 35 +- documentation/neon__convolution_8cpp_source.xhtml | 32 +- documentation/neon__copy__objects_8cpp.xhtml | 33 +- .../neon__copy__objects_8cpp_source.xhtml | 33 +- documentation/neon__scale_8cpp.xhtml | 35 +- documentation/neon__scale_8cpp_source.xhtml | 32 +- .../neoncl__scale__median__gaussian_8cpp.xhtml | 39 +- ...oncl__scale__median__gaussian_8cpp_source.xhtml | 40 +- documentation/non__linear__filter3x3_8cl.xhtml | 27 +- .../non__linear__filter3x3_8cl_source.xhtml | 13 +- documentation/non__linear__filter5x5_8cl.xhtml | 27 +- .../non__linear__filter5x5_8cl_source.xhtml | 13 +- .../non__linear__filter__helpers_8h.xhtml | 33 +- .../non__linear__filter__helpers_8h_source.xhtml | 13 +- documentation/nonmax_8cl.xhtml | 9 +- documentation/nonmax_8cl_source.xhtml | 9 +- documentation/normalization__layer_8cl.js | 2 +- documentation/normalization__layer_8cl.xhtml | 33 +- .../normalization__layer_8cl_source.xhtml | 16 +- documentation/optical__flow__pyramid__lk_8cl.xhtml | 24 +- .../optical__flow__pyramid__lk_8cl_source.xhtml | 12 +- .../{dir_000013_000012.xhtml => pages.xhtml} | 24 +- documentation/pixelwise__mul__float_8cl.xhtml | 9 +- .../pixelwise__mul__float_8cl_source.xhtml | 9 +- documentation/pixelwise__mul__int_8cl.xhtml | 9 +- documentation/pixelwise__mul__int_8cl_source.xhtml | 9 +- documentation/pooling__layer_8cl.js | 4 +- documentation/pooling__layer_8cl.xhtml | 118 +- documentation/pooling__layer_8cl_source.xhtml | 26 +- documentation/remap_8cl.xhtml | 9 +- documentation/remap_8cl_source.xhtml | 9 +- documentation/scale_8cl.xhtml | 9 +- documentation/scale_8cl_source.xhtml | 9 +- documentation/scharr__filter_8cl.xhtml | 9 +- documentation/scharr__filter_8cl_source.xhtml | 9 +- documentation/search/all_0.js | 8 +- documentation/search/all_1.js | 97 +- documentation/search/all_10.js | 114 +- documentation/search/all_11.js | 57 +- documentation/search/all_12.js | 160 +- documentation/search/all_13.js | 238 +- documentation/search/all_14.js | 116 +- documentation/search/all_15.js | 61 +- documentation/search/all_16.js | 119 +- documentation/search/all_17.js | 30 +- documentation/search/all_18.js | 23 +- documentation/search/all_19.js | 16 +- documentation/search/all_1a.js | 102 +- documentation/search/all_1b.html | 26 + documentation/search/all_1b.js | 158 + documentation/search/all_2.js | 191 +- documentation/search/all_3.js | 595 +- documentation/search/all_4.js | 714 +- documentation/search/all_5.js | 112 +- documentation/search/all_6.js | 67 +- documentation/search/all_7.js | 83 +- documentation/search/all_8.js | 97 +- documentation/search/all_9.js | 174 +- documentation/search/all_a.js | 229 +- documentation/search/all_b.js | 30 +- documentation/search/all_c.js | 94 +- documentation/search/all_d.js | 508 +- documentation/search/all_e.js | 522 +- documentation/search/all_f.js | 92 +- documentation/search/classes_0.js | 15 +- documentation/search/classes_1.js | 26 +- documentation/search/classes_10.html | 26 + documentation/search/classes_10.js | 10 + documentation/search/classes_11.html | 26 + documentation/search/classes_11.js | 21 + documentation/search/classes_12.html | 26 + documentation/search/classes_12.js | 14 + documentation/search/classes_13.html | 26 + documentation/search/classes_13.js | 5 + documentation/search/classes_14.html | 26 + documentation/search/classes_14.js | 6 + documentation/search/classes_15.html | 26 + documentation/search/classes_15.js | 6 + documentation/search/classes_2.js | 166 +- documentation/search/classes_3.js | 203 +- documentation/search/classes_4.js | 18 +- documentation/search/classes_5.js | 36 +- documentation/search/classes_6.js | 12 +- documentation/search/classes_7.js | 18 +- documentation/search/classes_8.js | 5 +- documentation/search/classes_9.js | 237 +- documentation/search/classes_a.js | 10 +- documentation/search/classes_b.js | 13 +- documentation/search/classes_c.js | 8 +- documentation/search/classes_d.js | 176 +- documentation/search/classes_e.js | 3 +- documentation/search/classes_f.js | 34 +- documentation/search/defines_0.js | 25 +- documentation/search/defines_1.js | 58 +- documentation/search/defines_10.html | 26 + documentation/search/defines_10.js | 10 + documentation/search/defines_11.html | 26 + documentation/search/defines_11.js | 10 + documentation/search/defines_2.js | 8 +- documentation/search/defines_3.js | 27 +- documentation/search/defines_4.js | 8 +- documentation/search/defines_5.js | 4 +- documentation/search/defines_6.js | 2 +- documentation/search/defines_7.js | 3 +- documentation/search/defines_8.js | 4 +- documentation/search/defines_9.js | 3 +- documentation/search/defines_a.js | 13 +- documentation/search/defines_b.js | 6 +- documentation/search/defines_c.js | 14 +- documentation/search/defines_d.js | 5 +- documentation/search/defines_e.js | 11 +- documentation/search/defines_f.js | 8 +- documentation/search/enums_4.js | 1 + documentation/search/enums_5.js | 2 +- documentation/search/enums_6.js | 2 +- documentation/search/enums_7.js | 3 +- documentation/search/enums_8.js | 4 +- documentation/search/enums_9.js | 6 +- documentation/search/enums_a.js | 2 +- documentation/search/enums_b.js | 6 +- documentation/search/enums_c.html | 26 + documentation/search/enums_c.js | 4 + documentation/search/enums_d.html | 26 + documentation/search/enums_d.js | 6 + documentation/search/enumvalues_0.js | 1 + documentation/search/enumvalues_1.js | 1 + documentation/search/enumvalues_10.js | 11 +- documentation/search/enumvalues_11.js | 5 +- documentation/search/enumvalues_12.html | 26 + documentation/search/enumvalues_12.js | 4 + documentation/search/enumvalues_13.html | 26 + documentation/search/enumvalues_13.js | 6 + documentation/search/enumvalues_2.js | 3 +- documentation/search/enumvalues_4.js | 6 +- documentation/search/enumvalues_5.js | 6 +- documentation/search/enumvalues_6.js | 5 +- documentation/search/enumvalues_7.js | 12 +- documentation/search/enumvalues_8.js | 11 +- documentation/search/enumvalues_9.js | 8 +- documentation/search/enumvalues_a.js | 5 +- documentation/search/enumvalues_b.js | 8 +- documentation/search/enumvalues_c.js | 12 +- documentation/search/enumvalues_d.js | 13 +- documentation/search/enumvalues_e.js | 21 +- documentation/search/enumvalues_f.js | 12 +- documentation/search/files_0.js | 12 +- documentation/search/files_1.js | 25 +- documentation/search/files_10.js | 21 +- documentation/search/files_11.js | 27 +- documentation/search/files_12.js | 9 +- documentation/search/files_13.js | 12 +- documentation/search/files_14.html | 26 + documentation/search/files_14.js | 10 + documentation/search/files_2.js | 160 +- documentation/search/files_3.js | 180 +- documentation/search/files_4.js | 13 +- documentation/search/files_5.js | 5 +- documentation/search/files_6.js | 15 +- documentation/search/files_7.js | 16 +- documentation/search/files_8.js | 41 +- documentation/search/files_9.js | 42 +- documentation/search/files_a.js | 13 +- documentation/search/files_b.js | 148 +- documentation/search/files_c.js | 161 +- documentation/search/files_d.js | 9 +- documentation/search/files_e.js | 23 +- documentation/search/files_f.js | 16 +- documentation/search/functions_0.js | 41 +- documentation/search/functions_1.js | 21 +- documentation/search/functions_10.js | 18 +- documentation/search/functions_11.js | 85 +- documentation/search/functions_12.js | 23 +- documentation/search/functions_13.js | 6 +- documentation/search/functions_14.js | 93 +- documentation/search/functions_15.js | 5 +- documentation/search/functions_19.js | 64 +- documentation/search/functions_2.js | 86 +- documentation/search/functions_3.js | 31 +- documentation/search/functions_4.js | 16 +- documentation/search/functions_5.js | 27 +- documentation/search/functions_6.js | 27 +- documentation/search/functions_7.js | 4 +- documentation/search/functions_8.js | 34 +- documentation/search/functions_9.js | 3 +- documentation/search/functions_a.js | 12 +- documentation/search/functions_b.js | 16 +- documentation/search/functions_c.js | 30 +- documentation/search/functions_d.js | 34 +- documentation/search/functions_e.js | 29 +- documentation/search/namespaces_0.js | 22 +- documentation/search/namespaces_1.js | 5 +- documentation/search/namespaces_2.js | 2 +- documentation/search/pages_0.js | 2 +- documentation/search/pages_1.html | 26 + documentation/search/pages_1.js | 4 + documentation/search/pages_2.html | 26 + documentation/search/pages_2.js | 4 + documentation/search/related_0.html | 26 + documentation/search/related_0.js | 5 + documentation/search/related_1.html | 26 + documentation/search/related_1.js | 4 + documentation/search/related_2.html | 26 + documentation/search/related_2.js | 4 + documentation/search/searchdata.js | 29 +- documentation/search/typedefs_0.js | 32 +- documentation/search/typedefs_1.js | 2 +- documentation/search/typedefs_10.html | 26 + documentation/search/typedefs_10.js | 6 + documentation/search/typedefs_11.html | 26 + documentation/search/typedefs_11.js | 5 + documentation/search/typedefs_2.js | 4 +- documentation/search/typedefs_3.js | 32 +- documentation/search/typedefs_4.js | 35 +- documentation/search/typedefs_5.js | 3 +- documentation/search/typedefs_6.js | 3 +- documentation/search/typedefs_7.js | 49 +- documentation/search/typedefs_8.js | 3 +- documentation/search/typedefs_9.js | 2 +- documentation/search/typedefs_a.js | 3 +- documentation/search/typedefs_b.js | 18 +- documentation/search/typedefs_c.js | 5 +- documentation/search/typedefs_d.html | 26 + documentation/search/typedefs_d.js | 22 + documentation/search/typedefs_e.html | 26 + documentation/search/typedefs_e.js | 5 + documentation/search/typedefs_f.html | 26 + documentation/search/typedefs_f.js | 6 + documentation/search/variables_0.js | 2 +- documentation/search/variables_1.js | 5 +- documentation/search/variables_10.js | 14 +- documentation/search/variables_11.js | 5 + documentation/search/variables_12.js | 4 +- documentation/search/variables_13.js | 2 +- documentation/search/variables_14.js | 3 +- documentation/search/variables_15.js | 2 +- documentation/search/variables_16.js | 2 +- documentation/search/variables_2.js | 2 + documentation/search/variables_3.js | 3 +- documentation/search/variables_4.js | 1 + documentation/search/variables_5.js | 5 +- documentation/search/variables_6.js | 4 +- documentation/search/variables_7.js | 3 +- documentation/search/variables_8.js | 2 +- documentation/search/variables_9.js | 3 +- documentation/search/variables_a.js | 5 +- documentation/search/variables_c.js | 2 + documentation/search/variables_e.js | 3 + documentation/search/variables_f.js | 3 +- documentation/sobel__filter_8cl.xhtml | 9 +- documentation/sobel__filter_8cl_source.xhtml | 9 +- documentation/softmax__layer_8cl.xhtml | 20 +- documentation/softmax__layer_8cl_source.xhtml | 13 +- documentation/struct_coordinates2_d.xhtml | 9 +- documentation/struct_detection_window.js | 9 + documentation/struct_detection_window.xhtml | 267 + documentation/struct_image.xhtml | 15 +- documentation/struct_internal_keypoint.xhtml | 9 +- documentation/struct_keypoint.xhtml | 9 +- documentation/struct_tensor3_d.xhtml | 17 +- documentation/struct_vector.xhtml | 13 +- .../structarm__compute_1_1_border_size.js | 1 + .../structarm__compute_1_1_border_size.xhtml | 143 +- ...uctarm__compute_1_1_c_l_coefficient_table.xhtml | 9 +- ...rm__compute_1_1_c_l_l_k_internal_keypoint.xhtml | 9 +- .../structarm__compute_1_1_c_l_old_value.xhtml | 9 +- .../structarm__compute_1_1_coordinates2_d.xhtml | 19 +- .../structarm__compute_1_1_coordinates3_d.xhtml | 21 +- .../structarm__compute_1_1_detection_window.xhtml | 35 +- .../structarm__compute_1_1_i_o_format_info.xhtml | 49 +- .../structarm__compute_1_1_key_point.xhtml | 29 +- ...rm__compute_1_1_n_e_l_k_internal_keypoint.xhtml | 9 +- .../structarm__compute_1_1_rectangle.xhtml | 23 +- .../structarm__compute_1_1_valid_region.xhtml | 49 +- ...structarm__compute_1_1cpp14_1_1___unique__if.js | 4 + ...uctarm__compute_1_1cpp14_1_1___unique__if.xhtml | 168 + ...compute_1_1cpp14_1_1___unique__if_3_01_t[]_4.js | 4 + ...pute_1_1cpp14_1_1___unique__if_3_01_t[]_4.xhtml | 168 + ...mpute_1_1cpp14_1_1___unique__if_3_01_t[_n]_4.js | 4 + ...te_1_1cpp14_1_1___unique__if_3_01_t[_n]_4.xhtml | 168 + ...tructarm__compute_1_1enable__bitwise__ops.xhtml | 176 + ...ps_3_01arm__compute_1_1_g_p_u_target_01_4.xhtml | 180 + ...tarm__compute_1_1test_1_1_user_configuration.js | 8 + ...m__compute_1_1test_1_1_user_configuration.xhtml | 279 + ...1_1test_1_1_user_configuration__coll__graph.map | 3 + ...1_1test_1_1_user_configuration__coll__graph.md5 | 1 + ...1_1test_1_1_user_configuration__coll__graph.svg | 79 + ...te_1_1test_1_1common__promoted__signed__type.js | 6 + ...1_1test_1_1common__promoted__signed__type.xhtml | 204 + ...m__compute_1_1test_1_1cpp14_1_1___unique__if.js | 4 + ...compute_1_1test_1_1cpp14_1_1___unique__if.xhtml | 173 + ...1_1test_1_1cpp14_1_1___unique__if_3_01_t[]_4.js | 4 + ...test_1_1cpp14_1_1___unique__if_3_01_t[]_4.xhtml | 168 + ...1test_1_1cpp14_1_1___unique__if_3_01_t[_n]_4.js | 4 + ...st_1_1cpp14_1_1___unique__if_3_01_t[_n]_4.xhtml | 168 + ...t__arithmetic_1_1detail_1_1constant__expr.xhtml | 489 + ..._point__arithmetic_1_1detail_1_1functions.xhtml | 1358 ++ ...d__point__arithmetic_1_1traits_1_1promote.xhtml | 145 + ...metic_1_1traits_1_1promote_3_01int16__t_01_4.js | 4 + ...ic_1_1traits_1_1promote_3_01int16__t_01_4.xhtml | 168 + ...metic_1_1traits_1_1promote_3_01int32__t_01_4.js | 4 + ...ic_1_1traits_1_1promote_3_01int32__t_01_4.xhtml | 168 + ...metic_1_1traits_1_1promote_3_01int64__t_01_4.js | 4 + ...ic_1_1traits_1_1promote_3_01int64__t_01_4.xhtml | 168 + ...hmetic_1_1traits_1_1promote_3_01int8__t_01_4.js | 4 + ...tic_1_1traits_1_1promote_3_01int8__t_01_4.xhtml | 168 + ...etic_1_1traits_1_1promote_3_01uint16__t_01_4.js | 4 + ...c_1_1traits_1_1promote_3_01uint16__t_01_4.xhtml | 168 + ...etic_1_1traits_1_1promote_3_01uint32__t_01_4.js | 4 + ...c_1_1traits_1_1promote_3_01uint32__t_01_4.xhtml | 168 + ...etic_1_1traits_1_1promote_3_01uint64__t_01_4.js | 4 + ...c_1_1traits_1_1promote_3_01uint64__t_01_4.xhtml | 168 + ...metic_1_1traits_1_1promote_3_01uint8__t_01_4.js | 4 + ...ic_1_1traits_1_1promote_3_01uint8__t_01_4.xhtml | 168 + ...rformance_1_1_performance_user_configuration.js | 6 + ...rmance_1_1_performance_user_configuration.xhtml | 254 + ...performance_user_configuration__coll__graph.map | 4 + ...performance_user_configuration__coll__graph.md5 | 1 + ...performance_user_configuration__coll__graph.svg | 92 + ...arm__compute_1_1test_1_1traits_1_1promote.xhtml | 145 + ..._1_1test_1_1traits_1_1promote_3_01float_01_4.js | 4 + ...1test_1_1traits_1_1promote_3_01float_01_4.xhtml | 168 + ...1test_1_1traits_1_1promote_3_01int16__t_01_4.js | 4 + ...st_1_1traits_1_1promote_3_01int16__t_01_4.xhtml | 168 + ...1test_1_1traits_1_1promote_3_01int32__t_01_4.js | 4 + ...st_1_1traits_1_1promote_3_01int32__t_01_4.xhtml | 168 + ..._1test_1_1traits_1_1promote_3_01int8__t_01_4.js | 4 + ...est_1_1traits_1_1promote_3_01int8__t_01_4.xhtml | 168 + ...test_1_1traits_1_1promote_3_01uint16__t_01_4.js | 4 + ...t_1_1traits_1_1promote_3_01uint16__t_01_4.xhtml | 168 + ...test_1_1traits_1_1promote_3_01uint32__t_01_4.js | 4 + ...t_1_1traits_1_1promote_3_01uint32__t_01_4.xhtml | 168 + ...1test_1_1traits_1_1promote_3_01uint8__t_01_4.js | 4 + ...st_1_1traits_1_1promote_3_01uint8__t_01_4.xhtml | 168 + ..._1_1test_1_1validation_1_1cl_1_1_c_l_fixture.js | 4 + ...1test_1_1validation_1_1cl_1_1_c_l_fixture.xhtml | 180 + ...ompute_1_1test_1_1validation_1_1match__const.js | 4 + ...ute_1_1test_1_1validation_1_1match__const.xhtml | 172 + ...r__visitors_1_1absolute__difference__visitor.js | 4 + ...visitors_1_1absolute__difference__visitor.xhtml | 203 + ...1absolute__difference__visitor__coll__graph.map | 2 + ...1absolute__difference__visitor__coll__graph.md5 | 1 + ...1absolute__difference__visitor__coll__graph.svg | 29 + ...nsor__visitors_1_1activation__layer__visitor.js | 5 + ...r__visitors_1_1activation__layer__visitor.xhtml | 226 + ..._1_1activation__layer__visitor__coll__graph.map | 2 + ..._1_1activation__layer__visitor__coll__graph.md5 | 1 + ..._1_1activation__layer__visitor__coll__graph.svg | 28 + ...r__visitors_1_1arithmetic__addition__visitor.js | 5 + ...visitors_1_1arithmetic__addition__visitor.xhtml | 232 + ...1arithmetic__addition__visitor__coll__graph.map | 2 + ...1arithmetic__addition__visitor__coll__graph.md5 | 1 + ...1arithmetic__addition__visitor__coll__graph.svg | 29 + ...visitors_1_1arithmetic__subtraction__visitor.js | 5 + ...itors_1_1arithmetic__subtraction__visitor.xhtml | 232 + ...ithmetic__subtraction__visitor__coll__graph.map | 2 + ...ithmetic__subtraction__visitor__coll__graph.md5 | 1 + ...ithmetic__subtraction__visitor__coll__graph.svg | 29 + ...tors_1_1batch__normalization__layer__visitor.js | 5 + ...s_1_1batch__normalization__layer__visitor.xhtml | 256 + ..._normalization__layer__visitor__coll__graph.map | 2 + ..._normalization__layer__visitor__coll__graph.md5 | 1 + ..._normalization__layer__visitor__coll__graph.svg | 29 + ...sor__visitors_1_1convolution__layer__visitor.js | 5 + ...__visitors_1_1convolution__layer__visitor.xhtml | 238 + ...1_1convolution__layer__visitor__coll__graph.map | 2 + ...1_1convolution__layer__visitor__coll__graph.md5 | 1 + ...1_1convolution__layer__visitor__coll__graph.svg | 28 + ...1tensor__visitors_1_1depth__convert__visitor.js | 5 + ...nsor__visitors_1_1depth__convert__visitor.xhtml | 236 + ...ors_1_1depth__convert__visitor__coll__graph.map | 2 + ...ors_1_1depth__convert__visitor__coll__graph.md5 | 1 + ...ors_1_1depth__convert__visitor__coll__graph.svg | 28 + ...visitors_1_1fixed__point__operation__visitor.js | 6 + ...itors_1_1fixed__point__operation__visitor.xhtml | 258 + ...xed__point__operation__visitor__coll__graph.map | 2 + ...xed__point__operation__visitor__coll__graph.md5 | 1 + ...xed__point__operation__visitor__coll__graph.svg | 29 + ..._point__pixel__wise__multiplication__visitor.js | 6 + ...int__pixel__wise__multiplication__visitor.xhtml | 276 + ..._wise__multiplication__visitor__coll__graph.map | 2 + ..._wise__multiplication__visitor__coll__graph.md5 | 1 + ..._wise__multiplication__visitor__coll__graph.svg | 29 + ...visitors_1_1fully__connected__layer__visitor.js | 5 + ...itors_1_1fully__connected__layer__visitor.xhtml | 232 + ...lly__connected__layer__visitor__coll__graph.map | 2 + ...lly__connected__layer__visitor__coll__graph.md5 | 1 + ...lly__connected__layer__visitor__coll__graph.svg | 29 + ...idation_1_1tensor__visitors_1_1gemm__visitor.js | 5 + ...tion_1_1tensor__visitors_1_1gemm__visitor.xhtml | 244 + ...sor__visitors_1_1gemm__visitor__coll__graph.map | 2 + ...sor__visitors_1_1gemm__visitor__coll__graph.md5 | 1 + ...sor__visitors_1_1gemm__visitor__coll__graph.svg | 28 + ...r__visitors_1_1normalization__layer__visitor.js | 5 + ...visitors_1_1normalization__layer__visitor.xhtml | 226 + ...1normalization__layer__visitor__coll__graph.map | 2 + ...1normalization__layer__visitor__coll__graph.md5 | 1 + ...1normalization__layer__visitor__coll__graph.svg | 29 + ...tors_1_1pixel__wise__multiplication__visitor.js | 5 + ...s_1_1pixel__wise__multiplication__visitor.xhtml | 248 + ..._wise__multiplication__visitor__coll__graph.map | 2 + ..._wise__multiplication__visitor__coll__graph.md5 | 1 + ..._wise__multiplication__visitor__coll__graph.svg | 29 + ...1tensor__visitors_1_1pooling__layer__visitor.js | 5 + ...nsor__visitors_1_1pooling__layer__visitor.xhtml | 232 + ...ors_1_1pooling__layer__visitor__coll__graph.map | 2 + ...ors_1_1pooling__layer__visitor__coll__graph.md5 | 1 + ...ors_1_1pooling__layer__visitor__coll__graph.svg | 28 + ...dation_1_1tensor__visitors_1_1print__visitor.js | 5 + ...ion_1_1tensor__visitors_1_1print__visitor.xhtml | 216 + ...or__visitors_1_1print__visitor__coll__graph.map | 2 + ...or__visitors_1_1print__visitor__coll__graph.md5 | 1 + ...or__visitors_1_1print__visitor__coll__graph.svg | 28 + ...1tensor__visitors_1_1softmax__layer__visitor.js | 5 + ...nsor__visitors_1_1softmax__layer__visitor.xhtml | 216 + ...ors_1_1softmax__layer__visitor__coll__graph.map | 2 + ...ors_1_1softmax__layer__visitor__coll__graph.md5 | 1 + ...ors_1_1softmax__layer__visitor__coll__graph.svg | 28 + ...ctarm__compute_1_1traits_1_1is__contained.xhtml | 149 + ..._1_1tuple_3_01_t_00_01_ts_8_8_8_01_4_01_4.xhtml | 151 + ..._01_t_00_01_ts_8_8_8_01_4_01_4__coll__graph.map | 2 + ..._01_t_00_01_ts_8_8_8_01_4_01_4__coll__graph.md5 | 1 + ..._01_t_00_01_ts_8_8_8_01_4_01_4__coll__graph.svg | 29 + ..._1_1tuple_3_01_u_00_01_ts_8_8_8_01_4_01_4.xhtml | 151 + ..._01_u_00_01_ts_8_8_8_01_4_01_4__coll__graph.map | 4 + ..._01_u_00_01_ts_8_8_8_01_4_01_4__coll__graph.md5 | 1 + ..._01_u_00_01_ts_8_8_8_01_4_01_4__coll__graph.svg | 47 + ...ntained_3_01_t_00_01std_1_1tuple_3_4_01_4.xhtml | 151 + ...1_t_00_01std_1_1tuple_3_4_01_4__coll__graph.map | 2 + ...1_t_00_01std_1_1tuple_3_4_01_4__coll__graph.md5 | 1 + ...1_t_00_01std_1_1tuple_3_4_01_4__coll__graph.svg | 29 + ...est_1_1_actc083718f2a45800f245b4789496ba62a.map | 2 + ...est_1_1_actc083718f2a45800f245b4789496ba62a.md5 | 1 + ...est_1_1_actc083718f2a45800f245b4789496ba62a.svg | 29 + ...ute_1_1test_1_1_activation_functions_01_4.xhtml | 155 + ...t_1_1_ale08494eac301fe59801c94a1e49323f42.xhtml | 155 + ...est_1_1_ale1c097a9fbdfd984a6c1fe1b85dace92a.map | 2 + ...est_1_1_ale1c097a9fbdfd984a6c1fe1b85dace92a.md5 | 1 + ...est_1_1_ale1c097a9fbdfd984a6c1fe1b85dace92a.svg | 30 + ...t_1_1_ale353bc52507aa43f06dbf95e757937ea3.xhtml | 155 + ...est_1_1_ale3666c604179e8c5e7b449c9783241da0.map | 2 + ...est_1_1_ale3666c604179e8c5e7b449c9783241da0.md5 | 1 + ...est_1_1_ale3666c604179e8c5e7b449c9783241da0.svg | 30 + ...__compute_1_1test_1_1_all_data_types_01_4.xhtml | 155 + ..._1test_1_1_all_data_types_01_4__coll__graph.map | 2 + ..._1test_1_1_all_data_types_01_4__coll__graph.md5 | 1 + ..._1test_1_1_all_data_types_01_4__coll__graph.svg | 29 + ...rm__compute_1_1test_1_1_border_modes_01_4.xhtml | 155 + ..._1_1test_1_1_border_modes_01_4__coll__graph.map | 2 + ..._1_1test_1_1_border_modes_01_4__coll__graph.md5 | 1 + ..._1_1test_1_1_border_modes_01_4__coll__graph.svg | 29 + ...est_1_1_c_n654a9ddc0be25ce9f53741a765f23cfb.map | 2 + ...est_1_1_c_n654a9ddc0be25ce9f53741a765f23cfb.md5 | 1 + ...est_1_1_c_n654a9ddc0be25ce9f53741a765f23cfb.svg | 29 + ...compute_1_1test_1_1_c_n_n_data_types_01_4.xhtml | 155 + ...test_1_1_c_n_n_data_types_01_4__coll__graph.map | 2 + ...test_1_1_c_n_n_data_types_01_4__coll__graph.md5 | 1 + ...test_1_1_c_n_n_data_types_01_4__coll__graph.svg | 29 + ...est_1_1_c_n_n_fixed_point_data_types_01_4.xhtml | 155 + ...e_1_1test_1_1_c_n_n_float_data_types_01_4.xhtml | 155 + ...est_1_1_c_nacf05d9e09b2edcfed642c23987f92f6.map | 2 + ...est_1_1_c_nacf05d9e09b2edcfed642c23987f92f6.md5 | 1 + ...est_1_1_c_nacf05d9e09b2edcfed642c23987f92f6.svg | 29 + ...compute_1_1test_1_1_convert_policies_01_4.xhtml | 155 + ...test_1_1_convert_policies_01_4__coll__graph.map | 2 + ...test_1_1_convert_policies_01_4__coll__graph.md5 | 1 + ...test_1_1_convert_policies_01_4__coll__graph.svg | 29 + ...est_1_1_dir618d2b8632fb2a14b2b8ad932f29c702.map | 2 + ...est_1_1_dir618d2b8632fb2a14b2b8ad932f29c702.md5 | 1 + ...est_1_1_dir618d2b8632fb2a14b2b8ad932f29c702.svg | 29 + ..._1test_1_1_direct_convolution_shapes_01_4.xhtml | 155 + ...est_1_1_fix894a7ec222bfe52ff9657f209e49bbb3.map | 2 + ...est_1_1_fix894a7ec222bfe52ff9657f209e49bbb3.md5 | 1 + ...est_1_1_fix894a7ec222bfe52ff9657f209e49bbb3.svg | 29 + ...e_1_1test_1_1_fixed_point_data_types_01_4.xhtml | 155 + ...compute_1_1test_1_1_float_data_types_01_4.xhtml | 155 + ...test_1_1_float_data_types_01_4__coll__graph.map | 2 + ...test_1_1_float_data_types_01_4__coll__graph.md5 | 1 + ...test_1_1_float_data_types_01_4__coll__graph.svg | 29 + ...est_1_1_intc1c44e2478649e95c09e2cce657a0700.map | 2 + ...est_1_1_intc1c44e2478649e95c09e2cce657a0700.md5 | 1 + ...est_1_1_intc1c44e2478649e95c09e2cce657a0700.svg | 29 + ...e_1_1test_1_1_interpolation_policies_01_4.xhtml | 155 + ...est_1_1_lar3e88d2c425acaee0299fc505fb789c24.map | 2 + ...est_1_1_lar3e88d2c425acaee0299fc505fb789c24.md5 | 1 + ...est_1_1_lar3e88d2c425acaee0299fc505fb789c24.svg | 30 + ...est_1_1_lar54e71ecc3aab9d0a8e146092477da42b.map | 2 + ...est_1_1_lar54e71ecc3aab9d0a8e146092477da42b.md5 | 1 + ...est_1_1_lar54e71ecc3aab9d0a8e146092477da42b.svg | 29 + ...t_1_1_larb321d1e758b5a8ba2357d45c9fb79214.xhtml | 155 + ...te_1_1test_1_1_large_g_e_m_m_dataset_01_4.xhtml | 155 + ...rm__compute_1_1test_1_1_large_images_01_4.xhtml | 155 + ..._1_1test_1_1_large_images_01_4__coll__graph.map | 2 + ..._1_1test_1_1_large_images_01_4__coll__graph.md5 | 1 + ..._1_1test_1_1_large_images_01_4__coll__graph.svg | 29 + ...rm__compute_1_1test_1_1_large_shapes_01_4.xhtml | 155 + ..._1_1test_1_1_large_shapes_01_4__coll__graph.map | 2 + ..._1_1test_1_1_large_shapes_01_4__coll__graph.md5 | 1 + ..._1_1test_1_1_large_shapes_01_4__coll__graph.svg | 29 + ...est_1_1_nor40ab218f8dce317d6fb9026633e97dfb.map | 2 + ...est_1_1_nor40ab218f8dce317d6fb9026633e97dfb.md5 | 1 + ...est_1_1_nor40ab218f8dce317d6fb9026633e97dfb.svg | 29 + ...pute_1_1test_1_1_normalization_types_01_4.xhtml | 155 + ...t_1_1_ran0c09af4ee7a64edb9be5e86462d7cfee.xhtml | 155 + ...est_1_1_ran7977c2cddcca626085649dfc89fd0d79.map | 2 + ...est_1_1_ran7977c2cddcca626085649dfc89fd0d79.md5 | 1 + ...est_1_1_ran7977c2cddcca626085649dfc89fd0d79.svg | 29 + ...est_1_1_ranacdaaefe63ce7ff5e3c1fbdb3c2d1461.map | 2 + ...est_1_1_ranacdaaefe63ce7ff5e3c1fbdb3c2d1461.md5 | 1 + ...est_1_1_ranacdaaefe63ce7ff5e3c1fbdb3c2d1461.svg | 30 + ...est_1_1_random_pooling_layer_dataset_01_4.xhtml | 155 + ...est_1_1_rou6d6a54280c694766b800dca4a14ecd03.map | 2 + ...est_1_1_rou6d6a54280c694766b800dca4a14ecd03.md5 | 1 + ...est_1_1_rou6d6a54280c694766b800dca4a14ecd03.svg | 29 + ...ompute_1_1test_1_1_rounding_policies_01_4.xhtml | 155 + ...est_1_1_sige8903bc485cfa5e8edbf1f41e67f7e95.map | 2 + ...est_1_1_sige8903bc485cfa5e8edbf1f41e67f7e95.md5 | 1 + ...est_1_1_sige8903bc485cfa5e8edbf1f41e67f7e95.svg | 29 + ...ompute_1_1test_1_1_signed_data_types_01_4.xhtml | 151 + ...t_1_1_sma37aa36c611469959a5228d982ba942dd.xhtml | 155 + ...est_1_1_sma62b3eab748b476484e57e35656c730cf.map | 2 + ...est_1_1_sma62b3eab748b476484e57e35656c730cf.md5 | 1 + ...est_1_1_sma62b3eab748b476484e57e35656c730cf.svg | 30 + ...est_1_1_sma68a6ec428c610323abd025da83fb53f1.map | 2 + ...est_1_1_sma68a6ec428c610323abd025da83fb53f1.md5 | 1 + ...est_1_1_sma68a6ec428c610323abd025da83fb53f1.svg | 29 + ...t_1_1_sma7e005b651d0eb6b1236ae8f2d63b33e2.xhtml | 155 + ...est_1_1_smafcd58fcc57b0091cc3ea3edcccbccf01.map | 2 + ...est_1_1_smafcd58fcc57b0091cc3ea3edcccbccf01.md5 | 1 + ...est_1_1_smafcd58fcc57b0091cc3ea3edcccbccf01.svg | 30 + ...__compute_1_1test_1_1_small1_d_shape_01_4.xhtml | 155 + ..._1test_1_1_small1_d_shape_01_4__coll__graph.map | 2 + ..._1test_1_1_small1_d_shape_01_4__coll__graph.md5 | 1 + ..._1test_1_1_small1_d_shape_01_4__coll__graph.svg | 29 + ...te_1_1test_1_1_small_g_e_m_m_dataset_01_4.xhtml | 155 + ...rm__compute_1_1test_1_1_small_images_01_4.xhtml | 155 + ..._1_1test_1_1_small_images_01_4__coll__graph.map | 2 + ..._1_1test_1_1_small_images_01_4__coll__graph.md5 | 1 + ..._1_1test_1_1_small_images_01_4__coll__graph.svg | 29 + ...rm__compute_1_1test_1_1_small_shapes_01_4.xhtml | 155 + ..._1_1test_1_1_small_shapes_01_4__coll__graph.map | 2 + ..._1_1test_1_1_small_shapes_01_4__coll__graph.md5 | 1 + ..._1_1test_1_1_small_shapes_01_4__coll__graph.svg | 29 + ...est_1_1_thr20a06b6ec29decfcd86dcbb31db5a7cc.map | 2 + ...est_1_1_thr20a06b6ec29decfcd86dcbb31db5a7cc.md5 | 1 + ...est_1_1_thr20a06b6ec29decfcd86dcbb31db5a7cc.svg | 29 + ...ompute_1_1test_1_1_threshold_dataset_01_4.xhtml | 155 + ...est_1_1_uns857987c56fcf4ace7a0307984bdb5675.map | 2 + ...est_1_1_uns857987c56fcf4ace7a0307984bdb5675.md5 | 1 + ...est_1_1_uns857987c56fcf4ace7a0307984bdb5675.svg | 29 + ...pute_1_1test_1_1_unsigned_data_types_01_4.xhtml | 155 + documentation/tablelookup_8cl.xhtml | 9 +- documentation/tablelookup_8cl_source.xhtml | 9 +- documentation/test__helpers_2_utils_8h.js | 9 - .../test__helpers_2_utils_8h_source.xhtml | 171 - documentation/tests.xhtml | 190 + documentation/tests_2_types_8h.js | 9 + documentation/tests_2_types_8h.xhtml | 153 + documentation/tests_2_types_8h_source.xhtml | 138 + documentation/tests_2_utils_8h.js | 44 + documentation/tests_2_utils_8h.xhtml | 301 + documentation/tests_2_utils_8h_source.xhtml | 222 + .../tests_2validation_2_fixed_point_8h.js | 52 + .../tests_2validation_2_fixed_point_8h.xhtml | 292 + ...tests_2validation_2_fixed_point_8h_source.xhtml | 219 + documentation/tests_2validation_2_helpers_8h.js | 5 + documentation/tests_2validation_2_helpers_8h.xhtml | 161 + .../tests_2validation_2_helpers_8h_source.xhtml | 141 + documentation/tests_2validation_2_tensor_8h.xhtml | 154 + .../tests_2validation_2_tensor_8h_source.xhtml | 157 + .../tests_2validation_2_u_n_i_t_2_utils_8cpp.xhtml | 139 + ...2validation_2_u_n_i_t_2_utils_8cpp_source.xhtml | 148 + documentation/threshold_8cl.xhtml | 12 +- documentation/threshold_8cl_source.xhtml | 12 +- documentation/transpose_8cl.xhtml | 9 +- documentation/transpose_8cl_source.xhtml | 9 +- documentation/types_8h.js | 2 + documentation/types_8h.xhtml | 29 +- documentation/types_8h_source.xhtml | 19 +- documentation/utils_2_utils_8cpp.js | 6 + documentation/utils_2_utils_8cpp.xhtml | 162 + documentation/utils_2_utils_8cpp_source.xhtml | 155 + documentation/utils_2_utils_8h.js | 9 + ...ers_2_utils_8h.xhtml => utils_2_utils_8h.xhtml} | 53 +- documentation/utils_2_utils_8h_source.xhtml | 172 + .../validation_2_c_l_2_bitwise_and_8cpp.xhtml | 152 + ...alidation_2_c_l_2_bitwise_and_8cpp_source.xhtml | 179 + documentation/validation_2_datasets_8h.xhtml | 257 + .../validation_2_datasets_8h_source.xhtml | 151 + ...idation_2_n_e_o_n_2_activation_layer_8cpp.xhtml | 153 + ..._2_n_e_o_n_2_activation_layer_8cpp_source.xhtml | 189 + .../validation_2_n_e_o_n_2_bitwise_and_8cpp.xhtml | 152 + ...ation_2_n_e_o_n_2_bitwise_and_8cpp_source.xhtml | 179 + ...dation_2_n_e_o_n_2_convolution_layer_8cpp.xhtml | 144 + ...2_n_e_o_n_2_convolution_layer_8cpp_source.xhtml | 169 + ...2_n_e_o_n_2_convolution_layer_direct_8cpp.xhtml | 152 + ..._n_2_convolution_layer_direct_8cpp_source.xhtml | 178 + ...on_2_n_e_o_n_2_fully_connected_layer_8cpp.xhtml | 145 + ...e_o_n_2_fully_connected_layer_8cpp_source.xhtml | 172 + .../validation_2_n_e_o_n_2_g_e_m_m_8cpp.xhtml | 152 + ...alidation_2_n_e_o_n_2_g_e_m_m_8cpp_source.xhtml | 175 + ...tion_2_n_e_o_n_2_normalization_layer_8cpp.xhtml | 142 + ...n_e_o_n_2_normalization_layer_8cpp_source.xhtml | 164 + ..._2_n_e_o_n_2_pooling_2_pooling_layer_8cpp.xhtml | 144 + ...o_n_2_pooling_2_pooling_layer_8cpp_source.xhtml | 167 + documentation/validation_2main_8cpp.js | 6 + documentation/validation_2main_8cpp.xhtml | 227 + documentation/validation_2main_8cpp_source.xhtml | 153 + documentation/warp__affine_8cl.xhtml | 9 +- documentation/warp__affine_8cl_source.xhtml | 9 +- documentation/warp__helpers_8h.xhtml | 14 +- documentation/warp__helpers_8h_source.xhtml | 12 +- documentation/warp__perspective_8cl.xhtml | 9 +- documentation/warp__perspective_8cl_source.xhtml | 9 +- examples/SConscript | 70 + examples/cl_convolution.cpp | 6 +- examples/cl_events.cpp | 6 +- examples/neon_cnn.cpp | 230 + examples/neon_convolution.cpp | 6 +- examples/neon_copy_objects.cpp | 4 +- examples/neon_scale.cpp | 6 +- examples/neoncl_scale_median_gaussian.cpp | 6 +- opencl-1.2-stubs/SConscript | 7 + opencl-1.2-stubs/sconscript | 8 - sconscript | 382 - src/core/AccessWindowAutoPadding.cpp | 4 +- src/core/AccessWindowStatic.cpp | 16 +- src/core/AccessWindowTranspose.cpp | 2 +- src/core/CL/CLHelpers.cpp | 107 +- src/core/CL/CLKernelLibrary.cpp | 23 +- src/core/CL/ICLHOG.cpp | 47 + src/core/CL/ICLKernel.cpp | 24 +- src/core/CL/ICLMultiHOG.cpp | 38 + src/core/CL/OpenCL.cpp | 40 + src/core/CL/cl_kernels/activation_layer.cl | 12 +- src/core/CL/cl_kernels/batchnormalization_layer.cl | 99 + src/core/CL/cl_kernels/concatenate.cl | 53 + src/core/CL/cl_kernels/convolution_layer.cl | 47 +- src/core/CL/cl_kernels/gemm.cl | 292 +- src/core/CL/cl_kernels/hog.cl | 455 + src/core/CL/cl_kernels/magnitude_phase.cl | 7 +- src/core/CL/cl_kernels/normalization_layer.cl | 14 +- src/core/CL/cl_kernels/pooling_layer.cl | 34 +- src/core/CL/cl_kernels/types.h | 10 + src/core/CL/cl_kernels/warp_helpers.h | 4 +- src/core/CL/kernels/CLActivationLayerKernel.cpp | 12 +- .../CL/kernels/CLBatchNormalizationLayerKernel.cpp | 115 + src/core/CL/kernels/CLDepthConcatenateKernel.cpp | 113 + src/core/CL/kernels/CLFillBorderKernel.cpp | 2 + src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 15 +- src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp | 38 +- src/core/CL/kernels/CLHOGDescriptorKernel.cpp | 200 + src/core/CL/kernels/CLHOGDetectorKernel.cpp | 130 + src/core/CL/kernels/CLIntegralImageKernel.cpp | 19 +- .../CLLocallyConnectedMatrixMultiplyKernel.cpp | 116 + src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 8 +- src/core/CL/kernels/CLPoolingLayerKernel.cpp | 10 +- src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 49 +- src/core/CL/kernels/CLTransposeKernel.cpp | 17 +- ...eshapeKernel.cpp => CLWeightsReshapeKernel.cpp} | 89 +- ...PDetectionWindowNonMaximaSuppressionKernel.cpp} | 20 +- .../CPP/kernels/CPPSortEuclideanDistanceKernel.cpp | 4 +- src/core/Error.cpp | 12 + src/core/Helpers.cpp | 44 +- src/core/IAccessWindow.cpp | 10 +- src/core/ITensor.cpp | 4 +- .../NEON/kernels/NEAbsoluteDifferenceKernel.cpp | 14 + src/core/NEON/kernels/NEAccumulateKernel.cpp | 21 + src/core/NEON/kernels/NEActivationLayerKernel.cpp | 125 +- .../NEON/kernels/NEArithmeticAdditionKernel.cpp | 15 + .../NEON/kernels/NEArithmeticSubtractionKernel.cpp | 17 +- .../kernels/NEBatchNormalizationLayerKernel.cpp | 187 + src/core/NEON/kernels/NEBitwiseAndKernel.cpp | 10 + src/core/NEON/kernels/NEBitwiseNotKernel.cpp | 9 + src/core/NEON/kernels/NEBitwiseOrKernel.cpp | 10 + src/core/NEON/kernels/NEBitwiseXorKernel.cpp | 10 + src/core/NEON/kernels/NEBox3x3Kernel.cpp | 9 + src/core/NEON/kernels/NECannyEdgeKernel.cpp | 40 +- src/core/NEON/kernels/NEChannelCombineKernel.cpp | 73 +- src/core/NEON/kernels/NEChannelExtractKernel.cpp | 77 +- src/core/NEON/kernels/NECol2ImKernel.cpp | 85 +- src/core/NEON/kernels/NEColorConvertKernel.cpp | 99 +- src/core/NEON/kernels/NEConvolutionKernel.cpp | 40 +- .../NEConvolutionLayerWeightsReshapeKernel.cpp | 120 - .../kernels/NECumulativeDistributionKernel.cpp | 36 +- src/core/NEON/kernels/NEDepthConcatenateKernel.cpp | 105 + src/core/NEON/kernels/NEDepthConvertKernel.cpp | 119 +- ...EDirectConvolutionLayerBiasAccumulateKernel.cpp | 207 + .../kernels/NEDirectConvolutionLayerKernel.cpp | 817 ++ src/core/NEON/kernels/NEFillBorderKernel.cpp | 14 +- .../NEON/kernels/NEGEMMInterleave4x4Kernel.cpp | 4 +- .../kernels/NEGEMMLowpMatrixMultiplyKernel.cpp | 374 +- .../kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp | 63 +- .../NEON/kernels/NEGEMMMatrixAdditionKernel.cpp | 242 +- .../NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp | 858 +- src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp | 103 +- src/core/NEON/kernels/NEHOGDescriptorKernel.cpp | 45 +- src/core/NEON/kernels/NEHOGDetectorKernel.cpp | 36 +- src/core/NEON/kernels/NEHarrisCornersKernel.cpp | 223 +- src/core/NEON/kernels/NEIm2ColKernel.cpp | 186 +- src/core/NEON/kernels/NEIntegralImageKernel.cpp | 18 +- .../NELocallyConnectedMatrixMultiplyKernel.cpp | 226 + src/core/NEON/kernels/NEMinMaxLocationKernel.cpp | 10 +- src/core/NEON/kernels/NENonLinearFilterKernel.cpp | 20 +- .../NEON/kernels/NENormalizationLayerKernel.cpp | 128 +- .../kernels/NEPixelWiseMultiplicationKernel.cpp | 79 +- src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 213 +- src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 385 +- src/core/NEON/kernels/NETransposeKernel.cpp | 17 +- src/core/NEON/kernels/NEWeightsReshapeKernel.cpp | 175 + src/core/SubTensorInfo.cpp | 78 + src/core/TensorInfo.cpp | 167 +- src/core/Utils.cpp | 32 +- src/core/Validate.cpp | 43 +- src/runtime/CL/CLHOG.cpp | 84 + src/runtime/CL/CLMultiHOG.cpp | 52 + src/runtime/CL/CLScheduler.cpp | 2 +- src/runtime/CL/CLSubTensor.cpp | 81 + .../CL/functions/CLBatchNormalizationLayer.cpp | 48 + src/runtime/CL/functions/CLConvolutionLayer.cpp | 201 +- src/runtime/CL/functions/CLDepthConcatenate.cpp | 71 + src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 224 +- src/runtime/CL/functions/CLHOGDescriptor.cpp | 99 + src/runtime/CL/functions/CLHOGDetector.cpp | 69 + src/runtime/CL/functions/CLHOGGradient.cpp | 75 + src/runtime/CL/functions/CLHOGMultiDetection.cpp | 240 + src/runtime/CL/functions/CLHarrisCorners.cpp | 4 +- .../CL/functions/CLLocallyConnectedLayer.cpp | 131 + src/runtime/CPP/CPPScheduler.cpp | 52 +- src/runtime/CPP/SingleThreadScheduler.cpp | 52 + src/runtime/NEON/INESimpleFunction.cpp | 2 +- .../NEON/functions/NEBatchNormalizationLayer.cpp | 49 + src/runtime/NEON/functions/NECannyEdge.cpp | 4 +- src/runtime/NEON/functions/NEConvolution.cpp | 6 +- src/runtime/NEON/functions/NEConvolutionLayer.cpp | 200 +- src/runtime/NEON/functions/NEDepthConcatenate.cpp | 67 + src/runtime/NEON/functions/NEDepthConvert.cpp | 4 +- src/runtime/NEON/functions/NEDerivative.cpp | 2 +- .../NEON/functions/NEDirectConvolutionLayer.cpp | 75 + src/runtime/NEON/functions/NEEqualizeHistogram.cpp | 4 +- src/runtime/NEON/functions/NEFastCorners.cpp | 6 +- src/runtime/NEON/functions/NEFillBorder.cpp | 2 +- .../NEON/functions/NEFullyConnectedLayer.cpp | 233 +- src/runtime/NEON/functions/NEGEMM.cpp | 32 +- src/runtime/NEON/functions/NEGEMMLowp.cpp | 12 +- src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp | 5 - src/runtime/NEON/functions/NEGaussian5x5.cpp | 4 +- src/runtime/NEON/functions/NEGaussianPyramid.cpp | 6 +- src/runtime/NEON/functions/NEHOGDescriptor.cpp | 4 +- src/runtime/NEON/functions/NEHOGDetector.cpp | 4 +- src/runtime/NEON/functions/NEHOGGradient.cpp | 2 +- src/runtime/NEON/functions/NEHOGMultiDetection.cpp | 6 +- src/runtime/NEON/functions/NEHarrisCorners.cpp | 4 +- src/runtime/NEON/functions/NEHistogram.cpp | 2 +- .../NEON/functions/NELocallyConnectedLayer.cpp | 131 + src/runtime/NEON/functions/NEMeanStdDev.cpp | 2 +- src/runtime/NEON/functions/NEMinMaxLocation.cpp | 4 +- .../NEON/functions/NENormalizationLayer.cpp | 8 +- src/runtime/NEON/functions/NEOpticalFlow.cpp | 4 +- src/runtime/NEON/functions/NESobel5x5.cpp | 4 +- src/runtime/NEON/functions/NESobel7x7.cpp | 4 +- src/runtime/NEON/functions/NESoftmaxLayer.cpp | 23 +- src/runtime/OMP/OMPScheduler.cpp | 83 + src/runtime/Scheduler.cpp | 149 + src/runtime/SubTensor.cpp | 57 + src/runtime/Tensor.cpp | 4 +- src/runtime/Utils.cpp | 42 + tests/CL/CLAccessor.h | 136 + tests/CL/Helper.h | 76 + tests/Globals.h | 38 + tests/IAccessor.h | 89 + tests/NEON/Helper.h | 77 + tests/NEON/NEAccessor.h | 124 + tests/ProgramOptions.cpp | 88 + tests/ProgramOptions.h | 101 + tests/RawTensor.cpp | 180 + tests/RawTensor.h | 159 + tests/SConscript | 145 + tests/SConscript.orig | 164 + tests/TensorCache.h | 118 + tests/TensorLibrary.cpp | 470 + tests/TensorLibrary.h | 654 + tests/TypePrinter.h | 403 + tests/TypeReader.h | 67 + tests/Types.h | 37 + tests/UserConfiguration.cpp | 55 + tests/UserConfiguration.h | 136 + tests/Utils.h | 672 + tests/benchmark/CL/ActivationLayer.cpp | 212 + tests/benchmark/CL/BitwiseAnd.cpp | 133 + tests/benchmark/CL/ConvolutionLayer.cpp | 277 + tests/benchmark/CL/FullyConnectedLayer.cpp | 116 + tests/benchmark/CL/GEMM.cpp | 492 + tests/benchmark/CL/GEMM.h | 101 + tests/benchmark/CL/NormalizationLayer.cpp | 93 + tests/benchmark/CL/PoolingLayer.cpp | 140 + tests/benchmark/Datasets.h | 79 + tests/benchmark/Instrument.h | 107 + tests/benchmark/NEON/ActivationLayer.cpp | 239 + tests/benchmark/NEON/BitwiseAnd.cpp | 126 + tests/benchmark/NEON/ConvolutionLayer.cpp | 303 + tests/benchmark/NEON/ConvolutionLayerDirect.cpp | 74 + tests/benchmark/NEON/FullyConnectedLayer.cpp | 132 + tests/benchmark/NEON/GEMM.cpp | 709 ++ tests/benchmark/NEON/GEMM.h | 105 + tests/benchmark/NEON/NormalizationLayer.cpp | 111 + tests/benchmark/NEON/PoolingLayer.cpp | 161 + tests/benchmark/PMUCounter.cpp | 144 + tests/benchmark/PMUCounter.h | 71 + tests/benchmark/PerformanceProgramOptions.cpp | 48 + tests/benchmark/PerformanceProgramOptions.h | 45 + tests/benchmark/PerformanceUserConfiguration.cpp | 45 + tests/benchmark/PerformanceUserConfiguration.h | 57 + tests/benchmark/Profiler.cpp | 87 + tests/benchmark/Profiler.h | 76 + tests/benchmark/WallClockTimer.cpp | 56 + tests/benchmark/WallClockTimer.h | 53 + tests/benchmark/common/ActivationLayer.h | 92 + tests/benchmark/common/ConvolutionLayer.h | 107 + tests/benchmark/common/FullyConnectedLayer.h | 108 + tests/benchmark/common/NormalizationLayer.h | 96 + tests/benchmark/common/PoolingLayer.h | 95 + tests/benchmark/main.cpp | 96 + tests/benchmark/system_tests/CL/AlexNet.cpp | 87 + tests/benchmark/system_tests/CL/LeNet5.cpp | 82 + tests/benchmark/system_tests/NEON/AlexNet.cpp | 120 + tests/benchmark/system_tests/NEON/LeNet5.cpp | 80 + tests/benchmark/system_tests/common/AlexNet.h | 95 + tests/benchmark/system_tests/common/LeNet5.h | 82 + tests/boost_wrapper.h | 40 + tests/dataset/ActivationFunctionDataset.h | 66 + tests/dataset/ActivationLayerDataset.h | 177 + tests/dataset/BatchNormalizationLayerDataset.h | 90 + tests/dataset/BorderModeDataset.h | 82 + tests/dataset/ConvertPolicyDataset.h | 82 + tests/dataset/ConvolutionLayerDataset.h | 269 + tests/dataset/DataTypeDatasets.h | 193 + tests/dataset/FullyConnectedLayerDataset.h | 155 + tests/dataset/GEMMDataset.h | 204 + tests/dataset/GenericDataset.h | 97 + tests/dataset/ImageDatasets.h | 115 + tests/dataset/InterpolationPolicyDataset.h | 80 + tests/dataset/NormalizationLayerDataset.h | 99 + tests/dataset/NormalizationTypeDataset.h | 80 + tests/dataset/PoolingLayerDataset.h | 158 + tests/dataset/RoundingPolicyDataset.h | 82 + tests/dataset/ShapeDatasets.h | 130 + tests/dataset/ThresholdDataset.h | 95 + tests/model_objects/AlexNet.h | 553 + tests/model_objects/LeNet5.h | 249 + tests/validation/CL/BitwiseAnd.cpp | 218 + tests/validation/CL/CLFixture.cpp | 33 + tests/validation/CL/CLFixture.h | 48 + tests/validation/CL/DepthConvert.cpp | 413 + tests/validation/CL/FillBorder.cpp | 91 + tests/validation/CL/Threshold.cpp | 154 + tests/validation/Datasets.h | 238 + tests/validation/FixedPoint.h | 972 ++ tests/validation/Helpers.h | 123 + tests/validation/NEON/AbsoluteDifference.cpp | 201 + tests/validation/NEON/Accumulate.cpp | 146 + tests/validation/NEON/AccumulateSquared.cpp | 147 + tests/validation/NEON/AccumulateWeighted.cpp | 146 + tests/validation/NEON/ActivationLayer.cpp | 217 + tests/validation/NEON/ArithmeticAddition.cpp | 228 + tests/validation/NEON/ArithmeticSubtraction.cpp | 228 + tests/validation/NEON/BatchNormalizationLayer.cpp | 195 + tests/validation/NEON/BitwiseAnd.cpp | 218 + tests/validation/NEON/BitwiseNot.cpp | 142 + tests/validation/NEON/BitwiseOr.cpp | 150 + tests/validation/NEON/BitwiseXor.cpp | 150 + tests/validation/NEON/Box3x3.cpp | 145 + tests/validation/NEON/ConvolutionLayer.cpp | 200 + tests/validation/NEON/ConvolutionLayerDirect.cpp | 219 + tests/validation/NEON/DepthConvert.cpp | 500 + tests/validation/NEON/FillBorder.cpp | 90 + tests/validation/NEON/Fixedpoint/Exp_QS8.cpp | 124 + tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp | 123 + tests/validation/NEON/Fixedpoint/Log_QS8.cpp | 123 + .../validation/NEON/Fixedpoint/Reciprocal_QS8.cpp | 123 + tests/validation/NEON/FullyConnectedLayer.cpp | 221 + tests/validation/NEON/GEMM.cpp | 203 + tests/validation/NEON/IntegralImage.cpp | 145 + tests/validation/NEON/NormalizationLayer.cpp | 152 + tests/validation/NEON/PixelWiseMultiplication.cpp | 428 + tests/validation/NEON/Pooling/PoolingLayer.cpp | 139 + tests/validation/NEON/SoftmaxLayer.cpp | 196 + tests/validation/NEON/Threshold.cpp | 154 + tests/validation/Reference.cpp | 596 + tests/validation/Reference.h | 303 + tests/validation/ReferenceCPP.cpp | 282 + tests/validation/ReferenceCPP.h | 250 + tests/validation/Tensor.h | 111 + tests/validation/TensorFactory.h | 113 + tests/validation/TensorOperations.h | 1370 ++ tests/validation/TensorVisitors.h | 386 + tests/validation/UNIT/FixedPoint.cpp | 162 + tests/validation/UNIT/TensorInfo.cpp | 91 + tests/validation/UNIT/TensorShape.cpp | 70 + tests/validation/UNIT/Utils.cpp | 77 + tests/validation/VX/DepthConvert.cpp | 346 + tests/validation/VX/VXHelpers.h | 65 + tests/validation/Validation.cpp | 359 + tests/validation/Validation.h | 127 + tests/validation/ValidationProgramOptions.cpp | 50 + tests/validation/ValidationProgramOptions.h | 45 + tests/validation/ValidationUserConfiguration.h | 42 + tests/validation/main.cpp | 104 + {test_helpers => utils}/Utils.cpp | 14 +- {test_helpers => utils}/Utils.h | 13 +- 4088 files changed, 258915 insertions(+), 32369 deletions(-) create mode 100644 SConscript create mode 100644 arm_compute/core/CL/CLTypes.h create mode 100644 arm_compute/core/CL/ICLHOG.h create mode 100644 arm_compute/core/CL/ICLMultiHOG.h create mode 100644 arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h create mode 100644 arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h create mode 100644 arm_compute/core/CL/kernels/CLHOGDetectorKernel.h create mode 100644 arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h rename arm_compute/core/CL/kernels/{CLConvolutionLayerWeightsReshapeKernel.h => CLWeightsReshapeKernel.h} (64%) rename arm_compute/core/{NEON/kernels/NEHOGNonMaximaSuppressionKernel.h => CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h} (61%) create mode 100644 arm_compute/core/FixedPoint.h create mode 100644 arm_compute/core/FixedPoint.inl create mode 100644 arm_compute/core/ITensorInfo.h create mode 100644 arm_compute/core/NEON/NEFixedPoint.h create mode 100644 arm_compute/core/NEON/NEFixedPoint.inl create mode 100644 arm_compute/core/NEON/NEMath.inl create mode 100644 arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h rename arm_compute/core/NEON/kernels/{NEConvolutionLayerWeightsReshapeKernel.h => NEWeightsReshapeKernel.h} (65%) create mode 100644 arm_compute/core/SubTensorInfo.h create mode 100644 arm_compute/runtime/CL/CLHOG.h create mode 100644 arm_compute/runtime/CL/CLMultiHOG.h create mode 100644 arm_compute/runtime/CL/CLSubTensor.h create mode 100644 arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLDepthConcatenate.h create mode 100644 arm_compute/runtime/CL/functions/CLHOGDescriptor.h create mode 100644 arm_compute/runtime/CL/functions/CLHOGDetector.h create mode 100644 arm_compute/runtime/CL/functions/CLHOGGradient.h create mode 100644 arm_compute/runtime/CL/functions/CLHOGMultiDetection.h create mode 100644 arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h create mode 100644 arm_compute/runtime/IScheduler.h create mode 100644 arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEDepthConcatenate.h create mode 100644 arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h create mode 100644 arm_compute/runtime/OMP/OMPScheduler.h create mode 100644 arm_compute/runtime/Scheduler.h create mode 100644 arm_compute/runtime/SingleThreadScheduler.h create mode 100644 arm_compute/runtime/SubTensor.h create mode 100644 arm_compute/runtime/Utils.h create mode 100644 docs/00_introduction.dox create mode 100644 docs/01_library.dox create mode 100644 docs/02_tests.dox delete mode 100644 docs/arm_compute.dox rename documentation/{_utils_8cpp.xhtml => 00__introduction_8dox.xhtml} (82%) rename documentation/{arm__compute_8dox.xhtml => 01__library_8dox.xhtml} (90%) create mode 100644 documentation/02__tests_8dox.xhtml create mode 100644 documentation/_absolute_difference_8cpp.xhtml create mode 100644 documentation/_absolute_difference_8cpp_source.xhtml create mode 100644 documentation/_accumulate_8cpp.xhtml create mode 100644 documentation/_accumulate_8cpp_source.xhtml create mode 100644 documentation/_accumulate_squared_8cpp.xhtml create mode 100644 documentation/_accumulate_squared_8cpp_source.xhtml create mode 100644 documentation/_accumulate_weighted_8cpp.xhtml create mode 100644 documentation/_accumulate_weighted_8cpp_source.xhtml create mode 100644 documentation/_activation_function_dataset_8h.xhtml create mode 100644 documentation/_activation_function_dataset_8h_source.xhtml create mode 100644 documentation/_activation_layer_8h.xhtml create mode 100644 documentation/_activation_layer_8h_source.xhtml create mode 100644 documentation/_activation_layer_dataset_8h.js create mode 100644 documentation/_activation_layer_dataset_8h.xhtml create mode 100644 documentation/_activation_layer_dataset_8h_source.xhtml create mode 100644 documentation/_arithmetic_addition_8cpp.xhtml create mode 100644 documentation/_arithmetic_addition_8cpp_source.xhtml create mode 100644 documentation/_arithmetic_subtraction_8cpp.xhtml create mode 100644 documentation/_arithmetic_subtraction_8cpp_source.xhtml create mode 100644 documentation/_batch_normalization_layer_8cpp.xhtml create mode 100644 documentation/_batch_normalization_layer_8cpp_source.xhtml create mode 100644 documentation/_batch_normalization_layer_dataset_8h.js create mode 100644 documentation/_batch_normalization_layer_dataset_8h.xhtml create mode 100644 documentation/_batch_normalization_layer_dataset_8h_source.xhtml create mode 100644 documentation/_bitwise_not_8cpp.xhtml create mode 100644 documentation/_bitwise_not_8cpp_source.xhtml create mode 100644 documentation/_bitwise_or_8cpp.xhtml create mode 100644 documentation/_bitwise_or_8cpp_source.xhtml create mode 100644 documentation/_bitwise_xor_8cpp.xhtml create mode 100644 documentation/_bitwise_xor_8cpp_source.xhtml create mode 100644 documentation/_border_mode_dataset_8h.xhtml create mode 100644 documentation/_border_mode_dataset_8h_source.xhtml create mode 100644 documentation/_box3x3_8cpp.xhtml create mode 100644 documentation/_box3x3_8cpp_source.xhtml create mode 100644 documentation/_c_l_2_alex_net_8cpp.js create mode 100644 documentation/_c_l_2_alex_net_8cpp.xhtml create mode 100644 documentation/_c_l_2_alex_net_8cpp_source.xhtml create mode 100644 documentation/_c_l_2_depth_convert_8cpp.xhtml create mode 100644 documentation/_c_l_2_depth_convert_8cpp_source.xhtml create mode 100644 documentation/_c_l_2_fill_border_8cpp.xhtml create mode 100644 documentation/_c_l_2_fill_border_8cpp_source.xhtml create mode 100644 documentation/_c_l_2_g_e_m_m_8h.xhtml create mode 100644 documentation/_c_l_2_g_e_m_m_8h_source.xhtml create mode 100644 documentation/_c_l_2_helper_8h.js create mode 100644 documentation/_c_l_2_helper_8h.xhtml create mode 100644 documentation/_c_l_2_helper_8h_source.xhtml create mode 100644 documentation/_c_l_2_le_net5_8cpp.js create mode 100644 documentation/_c_l_2_le_net5_8cpp.xhtml create mode 100644 documentation/_c_l_2_le_net5_8cpp_source.xhtml create mode 100644 documentation/_c_l_2_threshold_8cpp.xhtml create mode 100644 documentation/_c_l_2_threshold_8cpp_source.xhtml create mode 100644 documentation/_c_l_accessor_8h.xhtml create mode 100644 documentation/_c_l_accessor_8h_source.xhtml create mode 100644 documentation/_c_l_batch_normalization_layer_8h.xhtml create mode 100644 documentation/_c_l_batch_normalization_layer_8h_source.xhtml create mode 100644 documentation/_c_l_batch_normalization_layer_kernel_8h.xhtml create mode 100644 documentation/_c_l_batch_normalization_layer_kernel_8h_source.xhtml create mode 100644 documentation/_c_l_depth_concatenate_8h.xhtml create mode 100644 documentation/_c_l_depth_concatenate_8h_source.xhtml create mode 100644 documentation/_c_l_depth_concatenate_kernel_8h.xhtml create mode 100644 documentation/_c_l_depth_concatenate_kernel_8h_source.xhtml create mode 100644 documentation/_c_l_fixture_8cpp.js create mode 100644 documentation/_c_l_fixture_8cpp.xhtml create mode 100644 documentation/_c_l_fixture_8cpp_source.xhtml create mode 100644 documentation/_c_l_fixture_8h.xhtml create mode 100644 documentation/_c_l_fixture_8h_source.xhtml create mode 100644 documentation/_c_l_h_o_g_8h.xhtml create mode 100644 documentation/_c_l_h_o_g_8h_source.xhtml create mode 100644 documentation/_c_l_h_o_g_descriptor_8h.xhtml create mode 100644 documentation/_c_l_h_o_g_descriptor_8h_source.xhtml create mode 100644 documentation/_c_l_h_o_g_descriptor_kernel_8h.xhtml create mode 100644 documentation/_c_l_h_o_g_descriptor_kernel_8h_source.xhtml create mode 100644 documentation/_c_l_h_o_g_detector_8h.xhtml create mode 100644 documentation/_c_l_h_o_g_detector_8h_source.xhtml create mode 100644 documentation/_c_l_h_o_g_detector_kernel_8h.xhtml create mode 100644 documentation/_c_l_h_o_g_detector_kernel_8h_source.xhtml create mode 100644 documentation/_c_l_h_o_g_gradient_8h.xhtml create mode 100644 documentation/_c_l_h_o_g_gradient_8h_source.xhtml create mode 100644 documentation/_c_l_h_o_g_multi_detection_8h.xhtml create mode 100644 documentation/_c_l_h_o_g_multi_detection_8h_source.xhtml create mode 100644 documentation/_c_l_locally_connected_layer_8h.xhtml create mode 100644 documentation/_c_l_locally_connected_layer_8h_source.xhtml rename documentation/{_c_l_convolution_layer_weights_reshape_kernel_8h.xhtml => _c_l_locally_connected_matrix_multiply_kernel_8h.xhtml} (85%) rename documentation/{_c_l_convolution_layer_weights_reshape_kernel_8h_source.xhtml => _c_l_locally_connected_matrix_multiply_kernel_8h_source.xhtml} (53%) create mode 100644 documentation/_c_l_multi_h_o_g_8h.xhtml create mode 100644 documentation/_c_l_multi_h_o_g_8h_source.xhtml create mode 100644 documentation/_c_l_sub_tensor_8h.xhtml create mode 100644 documentation/_c_l_sub_tensor_8h_source.xhtml create mode 100644 documentation/_c_l_types_8h.js create mode 100644 documentation/_c_l_types_8h.xhtml create mode 100644 documentation/_c_l_types_8h_source.xhtml create mode 100644 documentation/_c_l_weights_reshape_kernel_8h.xhtml create mode 100644 documentation/_c_l_weights_reshape_kernel_8h_source.xhtml rename documentation/{_n_e_h_o_g_non_maxima_suppression_kernel_8h.xhtml => _c_p_p_detection_window_non_maxima_suppression_kernel_8h.xhtml} (79%) create mode 100644 documentation/_c_p_p_detection_window_non_maxima_suppression_kernel_8h_source.xhtml create mode 100644 documentation/_convert_policy_dataset_8h.xhtml create mode 100644 documentation/_convert_policy_dataset_8h_source.xhtml create mode 100644 documentation/_convolution_layer_8h.xhtml create mode 100644 documentation/_convolution_layer_8h_source.xhtml create mode 100644 documentation/_convolution_layer_dataset_8h.js create mode 100644 documentation/_convolution_layer_dataset_8h.xhtml create mode 100644 documentation/_convolution_layer_dataset_8h_source.xhtml create mode 100644 documentation/_data_type_datasets_8h.xhtml create mode 100644 documentation/_data_type_datasets_8h_source.xhtml create mode 100644 documentation/_exp___q_s8_8cpp.xhtml create mode 100644 documentation/_exp___q_s8_8cpp_source.xhtml create mode 100644 documentation/_fixed_point_8cpp.xhtml create mode 100644 documentation/_fixed_point_8cpp_source.xhtml create mode 100644 documentation/_fixed_point_8inl.js create mode 100644 documentation/_fixed_point_8inl.xhtml create mode 100644 documentation/_fixed_point_8inl_source.xhtml create mode 100644 documentation/_fully_connected_layer_8h.xhtml create mode 100644 documentation/_fully_connected_layer_8h_source.xhtml create mode 100644 documentation/_fully_connected_layer_dataset_8h.js create mode 100644 documentation/_fully_connected_layer_dataset_8h.xhtml create mode 100644 documentation/_fully_connected_layer_dataset_8h_source.xhtml create mode 100644 documentation/_g_e_m_m_dataset_8h.xhtml create mode 100644 documentation/_g_e_m_m_dataset_8h_source.xhtml create mode 100644 documentation/_generic_dataset_8h.xhtml create mode 100644 documentation/_generic_dataset_8h_source.xhtml create mode 100644 documentation/_globals_8h.xhtml create mode 100644 documentation/_globals_8h_source.xhtml delete mode 100644 documentation/_helpers_8h.js delete mode 100644 documentation/_helpers_8h.xhtml delete mode 100644 documentation/_helpers_8h_source.xhtml create mode 100644 documentation/_helpers_8inl.js create mode 100644 documentation/_i_accessor_8h.xhtml create mode 100644 documentation/_i_accessor_8h_source.xhtml create mode 100644 documentation/_i_c_l_h_o_g_8h.xhtml create mode 100644 documentation/_i_c_l_h_o_g_8h_source.xhtml create mode 100644 documentation/_i_c_l_multi_h_o_g_8h.xhtml create mode 100644 documentation/_i_c_l_multi_h_o_g_8h_source.xhtml create mode 100644 documentation/_i_scheduler_8h.xhtml create mode 100644 documentation/_i_scheduler_8h_source.xhtml create mode 100644 documentation/_i_tensor_info_8h.xhtml create mode 100644 documentation/_i_tensor_info_8h_source.xhtml create mode 100644 documentation/_image_datasets_8h.xhtml create mode 100644 documentation/_image_datasets_8h_source.xhtml create mode 100644 documentation/_instrument_8h.xhtml create mode 100644 documentation/_instrument_8h_source.xhtml create mode 100644 documentation/_integral_image_8cpp.xhtml create mode 100644 documentation/_integral_image_8cpp_source.xhtml create mode 100644 documentation/_interpolation_policy_dataset_8h.xhtml create mode 100644 documentation/_interpolation_policy_dataset_8h_source.xhtml create mode 100644 documentation/_invsqrt___q_s8_8cpp.xhtml create mode 100644 documentation/_invsqrt___q_s8_8cpp_source.xhtml create mode 100644 documentation/_log___q_s8_8cpp.xhtml create mode 100644 documentation/_log___q_s8_8cpp_source.xhtml create mode 100644 documentation/_n_e_accessor_8h.xhtml create mode 100644 documentation/_n_e_accessor_8h_source.xhtml create mode 100644 documentation/_n_e_batch_normalization_layer_8h.xhtml create mode 100644 documentation/_n_e_batch_normalization_layer_8h_source.xhtml create mode 100644 documentation/_n_e_batch_normalization_layer_kernel_8h.xhtml create mode 100644 documentation/_n_e_batch_normalization_layer_kernel_8h_source.xhtml create mode 100644 documentation/_n_e_depth_concatenate_8h.xhtml create mode 100644 documentation/_n_e_depth_concatenate_8h_source.xhtml create mode 100644 documentation/_n_e_depth_concatenate_kernel_8h.xhtml create mode 100644 documentation/_n_e_depth_concatenate_kernel_8h_source.xhtml create mode 100644 documentation/_n_e_direct_convolution_layer_8h.xhtml create mode 100644 documentation/_n_e_direct_convolution_layer_8h_source.xhtml create mode 100644 documentation/_n_e_direct_convolution_layer_bias_accumulate_kernel_8h.xhtml create mode 100644 documentation/_n_e_direct_convolution_layer_bias_accumulate_kernel_8h_source.xhtml create mode 100644 documentation/_n_e_direct_convolution_layer_kernel_8h.xhtml create mode 100644 documentation/_n_e_direct_convolution_layer_kernel_8h_source.xhtml create mode 100644 documentation/_n_e_fixed_point_8h.js create mode 100644 documentation/_n_e_fixed_point_8h.xhtml create mode 100644 documentation/_n_e_fixed_point_8h_source.xhtml create mode 100644 documentation/_n_e_fixed_point_8inl.js create mode 100644 documentation/_n_e_fixed_point_8inl.xhtml create mode 100644 documentation/_n_e_fixed_point_8inl_source.xhtml delete mode 100644 documentation/_n_e_h_o_g_non_maxima_suppression_kernel_8h_source.xhtml create mode 100644 documentation/_n_e_locally_connected_layer_8h.xhtml create mode 100644 documentation/_n_e_locally_connected_layer_8h_source.xhtml create mode 100644 documentation/_n_e_locally_connected_matrix_multiply_kernel_8h.xhtml rename documentation/{_n_e_convolution_layer_weights_reshape_kernel_8h_source.xhtml => _n_e_locally_connected_matrix_multiply_kernel_8h_source.xhtml} (54%) create mode 100644 documentation/_n_e_math_8inl.js create mode 100644 documentation/_n_e_math_8inl.xhtml create mode 100644 documentation/_n_e_math_8inl_source.xhtml create mode 100644 documentation/_n_e_o_n_2_alex_net_8cpp.js create mode 100644 documentation/_n_e_o_n_2_alex_net_8cpp.xhtml create mode 100644 documentation/_n_e_o_n_2_alex_net_8cpp_source.xhtml create mode 100644 documentation/_n_e_o_n_2_depth_convert_8cpp.xhtml create mode 100644 documentation/_n_e_o_n_2_depth_convert_8cpp_source.xhtml create mode 100644 documentation/_n_e_o_n_2_fill_border_8cpp.xhtml create mode 100644 documentation/_n_e_o_n_2_fill_border_8cpp_source.xhtml create mode 100644 documentation/_n_e_o_n_2_g_e_m_m_8h.xhtml create mode 100644 documentation/_n_e_o_n_2_g_e_m_m_8h_source.xhtml create mode 100644 documentation/_n_e_o_n_2_helper_8h.js create mode 100644 documentation/_n_e_o_n_2_helper_8h.xhtml create mode 100644 documentation/_n_e_o_n_2_helper_8h_source.xhtml create mode 100644 documentation/_n_e_o_n_2_le_net5_8cpp.js create mode 100644 documentation/_n_e_o_n_2_le_net5_8cpp.xhtml create mode 100644 documentation/_n_e_o_n_2_le_net5_8cpp_source.xhtml create mode 100644 documentation/_n_e_o_n_2_threshold_8cpp.xhtml create mode 100644 documentation/_n_e_o_n_2_threshold_8cpp_source.xhtml rename documentation/{_n_e_convolution_layer_weights_reshape_kernel_8h.xhtml => _n_e_weights_reshape_kernel_8h.xhtml} (85%) create mode 100644 documentation/_n_e_weights_reshape_kernel_8h_source.xhtml create mode 100644 documentation/_normalization_layer_8h.xhtml create mode 100644 documentation/_normalization_layer_8h_source.xhtml create mode 100644 documentation/_normalization_layer_dataset_8h.js create mode 100644 documentation/_normalization_layer_dataset_8h.xhtml create mode 100644 documentation/_normalization_layer_dataset_8h_source.xhtml create mode 100644 documentation/_normalization_type_dataset_8h.xhtml create mode 100644 documentation/_normalization_type_dataset_8h_source.xhtml create mode 100644 documentation/_o_m_p_scheduler_8h.xhtml create mode 100644 documentation/_o_m_p_scheduler_8h_source.xhtml create mode 100644 documentation/_p_m_u_counter_8cpp.js create mode 100644 documentation/_p_m_u_counter_8cpp.xhtml create mode 100644 documentation/_p_m_u_counter_8cpp_source.xhtml create mode 100644 documentation/_p_m_u_counter_8h.xhtml create mode 100644 documentation/_p_m_u_counter_8h_source.xhtml create mode 100644 documentation/_performance_program_options_8cpp.xhtml create mode 100644 documentation/_performance_program_options_8cpp_source.xhtml create mode 100644 documentation/_performance_program_options_8h.xhtml create mode 100644 documentation/_performance_program_options_8h_source.xhtml create mode 100644 documentation/_performance_user_configuration_8cpp.xhtml create mode 100644 documentation/_performance_user_configuration_8cpp_source.xhtml create mode 100644 documentation/_performance_user_configuration_8h.xhtml create mode 100644 documentation/_performance_user_configuration_8h_source.xhtml create mode 100644 documentation/_pixel_wise_multiplication_8cpp.xhtml create mode 100644 documentation/_pixel_wise_multiplication_8cpp_source.xhtml create mode 100644 documentation/_pooling_layer_8h.xhtml create mode 100644 documentation/_pooling_layer_8h_source.xhtml create mode 100644 documentation/_pooling_layer_dataset_8h.js create mode 100644 documentation/_pooling_layer_dataset_8h.xhtml create mode 100644 documentation/_pooling_layer_dataset_8h_source.xhtml create mode 100644 documentation/_profiler_8cpp.xhtml create mode 100644 documentation/_profiler_8cpp_source.xhtml create mode 100644 documentation/_profiler_8h.xhtml create mode 100644 documentation/_profiler_8h_source.xhtml create mode 100644 documentation/_program_options_8cpp.xhtml create mode 100644 documentation/_program_options_8cpp_source.xhtml create mode 100644 documentation/_program_options_8h.xhtml create mode 100644 documentation/_program_options_8h_source.xhtml create mode 100644 documentation/_raw_tensor_8cpp.js create mode 100644 documentation/_raw_tensor_8cpp.xhtml create mode 100644 documentation/_raw_tensor_8cpp_source.xhtml create mode 100644 documentation/_raw_tensor_8h.xhtml create mode 100644 documentation/_raw_tensor_8h_source.xhtml create mode 100644 documentation/_reciprocal___q_s8_8cpp.xhtml create mode 100644 documentation/_reciprocal___q_s8_8cpp_source.xhtml create mode 100644 documentation/_reference_8cpp.xhtml create mode 100644 documentation/_reference_8cpp_source.xhtml create mode 100644 documentation/_reference_8h.xhtml create mode 100644 documentation/_reference_8h_source.xhtml create mode 100644 documentation/_reference_c_p_p_8cpp.xhtml create mode 100644 documentation/_reference_c_p_p_8cpp_source.xhtml create mode 100644 documentation/_reference_c_p_p_8h.xhtml create mode 100644 documentation/_reference_c_p_p_8h_source.xhtml create mode 100644 documentation/_rounding_policy_dataset_8h.xhtml create mode 100644 documentation/_rounding_policy_dataset_8h_source.xhtml create mode 100644 documentation/_scheduler_8h.xhtml create mode 100644 documentation/_scheduler_8h_source.xhtml create mode 100644 documentation/_shape_datasets_8h.xhtml create mode 100644 documentation/_shape_datasets_8h_source.xhtml create mode 100644 documentation/_single_thread_scheduler_8h.xhtml create mode 100644 documentation/_single_thread_scheduler_8h_source.xhtml create mode 100644 documentation/_softmax_layer_8cpp.xhtml create mode 100644 documentation/_softmax_layer_8cpp_source.xhtml create mode 100644 documentation/_sub_tensor_8h.xhtml create mode 100644 documentation/_sub_tensor_8h_source.xhtml create mode 100644 documentation/_sub_tensor_info_8h.xhtml create mode 100644 documentation/_sub_tensor_info_8h_source.xhtml delete mode 100644 documentation/_tensor_8h.js delete mode 100644 documentation/_tensor_8h_source.xhtml create mode 100644 documentation/_tensor_cache_8h.xhtml create mode 100644 documentation/_tensor_cache_8h_source.xhtml create mode 100644 documentation/_tensor_factory_8h.js create mode 100644 documentation/_tensor_factory_8h.xhtml create mode 100644 documentation/_tensor_factory_8h_source.xhtml create mode 100644 documentation/_tensor_info_8cpp.xhtml create mode 100644 documentation/_tensor_info_8cpp_source.xhtml create mode 100644 documentation/_tensor_library_8cpp.xhtml create mode 100644 documentation/_tensor_library_8cpp_source.xhtml create mode 100644 documentation/_tensor_library_8h.xhtml create mode 100644 documentation/_tensor_library_8h_source.xhtml create mode 100644 documentation/_tensor_operations_8h.js create mode 100644 documentation/_tensor_operations_8h.xhtml create mode 100644 documentation/_tensor_operations_8h_source.xhtml create mode 100644 documentation/_tensor_shape_8cpp.xhtml create mode 100644 documentation/_tensor_shape_8cpp_source.xhtml create mode 100644 documentation/_tensor_visitors_8h.js create mode 100644 documentation/_tensor_visitors_8h.xhtml create mode 100644 documentation/_tensor_visitors_8h_source.xhtml create mode 100644 documentation/_threshold_dataset_8h.xhtml create mode 100644 documentation/_threshold_dataset_8h_source.xhtml create mode 100644 documentation/_type_printer_8h.js create mode 100644 documentation/_type_printer_8h.xhtml create mode 100644 documentation/_type_printer_8h_source.xhtml create mode 100644 documentation/_type_reader_8h.js create mode 100644 documentation/_type_reader_8h.xhtml create mode 100644 documentation/_type_reader_8h_source.xhtml delete mode 100644 documentation/_types_8h.js delete mode 100644 documentation/_types_8h_source.xhtml create mode 100644 documentation/_user_configuration_8cpp.xhtml create mode 100644 documentation/_user_configuration_8cpp_source.xhtml create mode 100644 documentation/_user_configuration_8h.xhtml create mode 100644 documentation/_user_configuration_8h_source.xhtml delete mode 100644 documentation/_utils_8cpp_source.xhtml create mode 100644 documentation/_v_x_2_depth_convert_8cpp.xhtml create mode 100644 documentation/_v_x_2_depth_convert_8cpp_source.xhtml create mode 100644 documentation/_v_x_helpers_8h.js create mode 100644 documentation/_v_x_helpers_8h.xhtml create mode 100644 documentation/_v_x_helpers_8h_source.xhtml create mode 100644 documentation/_validation_8cpp.js create mode 100644 documentation/_validation_8cpp.xhtml create mode 100644 documentation/_validation_8cpp_source.xhtml create mode 100644 documentation/_validation_8h.js create mode 100644 documentation/_validation_8h.xhtml create mode 100644 documentation/_validation_8h_source.xhtml create mode 100644 documentation/_validation_program_options_8cpp.xhtml create mode 100644 documentation/_validation_program_options_8cpp_source.xhtml create mode 100644 documentation/_validation_program_options_8h.xhtml create mode 100644 documentation/_validation_program_options_8h_source.xhtml create mode 100644 documentation/_validation_user_configuration_8h.js create mode 100644 documentation/_validation_user_configuration_8h.xhtml create mode 100644 documentation/_validation_user_configuration_8h_source.xhtml create mode 100644 documentation/_wall_clock_timer_8cpp.xhtml create mode 100644 documentation/_wall_clock_timer_8cpp_source.xhtml create mode 100644 documentation/_wall_clock_timer_8h.xhtml create mode 100644 documentation/_wall_clock_timer_8h_source.xhtml create mode 100644 documentation/architecture.xhtml create mode 100644 documentation/arm__compute_2core_2_fixed_point_8h.js create mode 100644 documentation/arm__compute_2core_2_fixed_point_8h.xhtml create mode 100644 documentation/arm__compute_2core_2_fixed_point_8h_source.xhtml create mode 100644 documentation/arm__compute_2core_2_helpers_8h.js create mode 100644 documentation/arm__compute_2core_2_helpers_8h.xhtml create mode 100644 documentation/arm__compute_2core_2_helpers_8h_source.xhtml create mode 100644 documentation/arm__compute_2core_2_types_8h.js rename documentation/{_types_8h.xhtml => arm__compute_2core_2_types_8h.xhtml} (94%) create mode 100644 documentation/arm__compute_2core_2_types_8h_source.xhtml create mode 100644 documentation/arm__compute_2runtime_2_tensor_8h.js rename documentation/{_tensor_8h.xhtml => arm__compute_2runtime_2_tensor_8h.xhtml} (91%) create mode 100644 documentation/arm__compute_2runtime_2_tensor_8h_source.xhtml create mode 100644 documentation/arm__compute_2runtime_2_utils_8h.js create mode 100644 documentation/arm__compute_2runtime_2_utils_8h.xhtml create mode 100644 documentation/arm__compute_2runtime_2_utils_8h_source.xhtml create mode 100644 documentation/batchnormalization__layer_8cl.js create mode 100644 documentation/batchnormalization__layer_8cl.xhtml create mode 100644 documentation/batchnormalization__layer_8cl_source.xhtml create mode 100644 documentation/benchmark_2_c_l_2_activation_layer_8cpp.js create mode 100644 documentation/benchmark_2_c_l_2_activation_layer_8cpp.xhtml create mode 100644 documentation/benchmark_2_c_l_2_activation_layer_8cpp_source.xhtml create mode 100644 documentation/benchmark_2_c_l_2_bitwise_and_8cpp.js create mode 100644 documentation/benchmark_2_c_l_2_bitwise_and_8cpp.xhtml create mode 100644 documentation/benchmark_2_c_l_2_bitwise_and_8cpp_source.xhtml create mode 100644 documentation/benchmark_2_c_l_2_convolution_layer_8cpp.js create mode 100644 documentation/benchmark_2_c_l_2_convolution_layer_8cpp.xhtml create mode 100644 documentation/benchmark_2_c_l_2_convolution_layer_8cpp_source.xhtml create mode 100644 documentation/benchmark_2_c_l_2_fully_connected_layer_8cpp.js create mode 100644 documentation/benchmark_2_c_l_2_fully_connected_layer_8cpp.xhtml create mode 100644 documentation/benchmark_2_c_l_2_fully_connected_layer_8cpp_source.xhtml create mode 100644 documentation/benchmark_2_c_l_2_g_e_m_m_8cpp.js create mode 100644 documentation/benchmark_2_c_l_2_g_e_m_m_8cpp.xhtml create mode 100644 documentation/benchmark_2_c_l_2_g_e_m_m_8cpp_source.xhtml create mode 100644 documentation/benchmark_2_c_l_2_normalization_layer_8cpp.js create mode 100644 documentation/benchmark_2_c_l_2_normalization_layer_8cpp.xhtml create mode 100644 documentation/benchmark_2_c_l_2_normalization_layer_8cpp_source.xhtml create mode 100644 documentation/benchmark_2_c_l_2_pooling_layer_8cpp.js create mode 100644 documentation/benchmark_2_c_l_2_pooling_layer_8cpp.xhtml create mode 100644 documentation/benchmark_2_c_l_2_pooling_layer_8cpp_source.xhtml create mode 100644 documentation/benchmark_2_datasets_8h.js create mode 100644 documentation/benchmark_2_datasets_8h.xhtml create mode 100644 documentation/benchmark_2_datasets_8h_source.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_activation_layer_8cpp.js create mode 100644 documentation/benchmark_2_n_e_o_n_2_activation_layer_8cpp.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_activation_layer_8cpp_source.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_bitwise_and_8cpp.js create mode 100644 documentation/benchmark_2_n_e_o_n_2_bitwise_and_8cpp.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_bitwise_and_8cpp_source.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_convolution_layer_8cpp.js create mode 100644 documentation/benchmark_2_n_e_o_n_2_convolution_layer_8cpp.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_convolution_layer_8cpp_source.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_convolution_layer_direct_8cpp.js create mode 100644 documentation/benchmark_2_n_e_o_n_2_convolution_layer_direct_8cpp.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_convolution_layer_direct_8cpp_source.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_fully_connected_layer_8cpp.js create mode 100644 documentation/benchmark_2_n_e_o_n_2_fully_connected_layer_8cpp.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_fully_connected_layer_8cpp_source.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_g_e_m_m_8cpp.js create mode 100644 documentation/benchmark_2_n_e_o_n_2_g_e_m_m_8cpp.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_g_e_m_m_8cpp_source.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_normalization_layer_8cpp.js create mode 100644 documentation/benchmark_2_n_e_o_n_2_normalization_layer_8cpp.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_normalization_layer_8cpp_source.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_pooling_layer_8cpp.js create mode 100644 documentation/benchmark_2_n_e_o_n_2_pooling_layer_8cpp.xhtml create mode 100644 documentation/benchmark_2_n_e_o_n_2_pooling_layer_8cpp_source.xhtml create mode 100644 documentation/benchmark_2main_8cpp.js create mode 100644 documentation/benchmark_2main_8cpp.xhtml create mode 100644 documentation/benchmark_2main_8cpp_source.xhtml create mode 100644 documentation/benchmark_2system__tests_2common_2_alex_net_8h.xhtml create mode 100644 documentation/benchmark_2system__tests_2common_2_alex_net_8h_source.xhtml create mode 100644 documentation/benchmark_2system__tests_2common_2_le_net5_8h.xhtml create mode 100644 documentation/benchmark_2system__tests_2common_2_le_net5_8h_source.xhtml create mode 100644 documentation/boost__wrapper_8h.xhtml create mode 100644 documentation/boost__wrapper_8h_source.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_batch_normalization_layer.js create mode 100644 documentation/classarm__compute_1_1_c_l_batch_normalization_layer.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_batch_normalization_layer__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_batch_normalization_layer__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_batch_normalization_layer__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_batch_normalization_layer_kernel.js create mode 100644 documentation/classarm__compute_1_1_c_l_batch_normalization_layer_kernel.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_batch_normalization_layer_kernel__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_batch_normalization_layer_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_batch_normalization_layer_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_convolution_layer_reshape_weights.js create mode 100644 documentation/classarm__compute_1_1_c_l_convolution_layer_reshape_weights.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_convolution_layer_reshape_weights__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_convolution_layer_reshape_weights__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_convolution_layer_reshape_weights__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_depth_concatenate.js create mode 100644 documentation/classarm__compute_1_1_c_l_depth_concatenate.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_depth_concatenate__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_depth_concatenate__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_depth_concatenate__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_depth_concatenate_kernel.js create mode 100644 documentation/classarm__compute_1_1_c_l_depth_concatenate_kernel.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_depth_concatenate_kernel__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_depth_concatenate_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_depth_concatenate_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_fully_connected_layer_reshape_weights.js create mode 100644 documentation/classarm__compute_1_1_c_l_fully_connected_layer_reshape_weights.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_fully_connected_layer_reshape_weights__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_fully_connected_layer_reshape_weights__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_fully_connected_layer_reshape_weights__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g.js create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_block_normalization_kernel.js create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_block_normalization_kernel.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_block_normalization_kernel__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_block_normalization_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_block_normalization_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_descriptor.js create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_descriptor.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_descriptor__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_descriptor__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_descriptor__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_detector.js create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_detector.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_detector__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_detector__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_detector__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_detector_kernel.js create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_detector_kernel.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_detector_kernel__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_detector_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_detector_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_gradient.js create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_gradient.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_gradient__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_gradient__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_gradient__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_multi_detection.js create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_multi_detection.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_multi_detection__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_multi_detection__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_multi_detection__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_orientation_binning_kernel.js create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_orientation_binning_kernel.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_orientation_binning_kernel__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_orientation_binning_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_h_o_g_orientation_binning_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_layer.js create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_layer.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_layer__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_layer__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_layer__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_layer_weights_reshape_kernel.js create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_layer_weights_reshape_kernel.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_layer_weights_reshape_kernel__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_layer_weights_reshape_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_layer_weights_reshape_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_matrix_multiply_kernel.js create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_matrix_multiply_kernel.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_matrix_multiply_kernel__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_matrix_multiply_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_locally_connected_matrix_multiply_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_multi_h_o_g.js create mode 100644 documentation/classarm__compute_1_1_c_l_multi_h_o_g.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_multi_h_o_g__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_multi_h_o_g__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_multi_h_o_g__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_sub_tensor.js create mode 100644 documentation/classarm__compute_1_1_c_l_sub_tensor.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_sub_tensor__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_sub_tensor__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_sub_tensor__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_l_weights_reshape_kernel.js create mode 100644 documentation/classarm__compute_1_1_c_l_weights_reshape_kernel.xhtml create mode 100644 documentation/classarm__compute_1_1_c_l_weights_reshape_kernel__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_l_weights_reshape_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_l_weights_reshape_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_p_p_detection_window_non_maxima_suppression_kernel.js rename documentation/{classarm__compute_1_1_n_e_h_o_g_non_maxima_suppression_kernel.xhtml => classarm__compute_1_1_c_p_p_detection_window_non_maxima_suppression_kernel.xhtml} (69%) create mode 100644 documentation/classarm__compute_1_1_c_p_p_detection_window_non_maxima_suppression_kernel__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_p_p_detection_window_non_maxima_suppression_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_p_p_detection_window_non_maxima_suppression_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_c_p_p_scheduler__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_c_p_p_scheduler__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_c_p_p_scheduler__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_i_c_l_h_o_g.js create mode 100644 documentation/classarm__compute_1_1_i_c_l_h_o_g.xhtml create mode 100644 documentation/classarm__compute_1_1_i_c_l_h_o_g__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_i_c_l_h_o_g__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_i_c_l_h_o_g__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_i_c_l_multi_h_o_g.js create mode 100644 documentation/classarm__compute_1_1_i_c_l_multi_h_o_g.xhtml create mode 100644 documentation/classarm__compute_1_1_i_c_l_multi_h_o_g__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_i_c_l_multi_h_o_g__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_i_c_l_multi_h_o_g__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_i_scheduler.js create mode 100644 documentation/classarm__compute_1_1_i_scheduler.xhtml create mode 100644 documentation/classarm__compute_1_1_i_tensor_info.js create mode 100644 documentation/classarm__compute_1_1_i_tensor_info.xhtml create mode 100644 documentation/classarm__compute_1_1_n_e_batch_normalization_layer.js create mode 100644 documentation/classarm__compute_1_1_n_e_batch_normalization_layer.xhtml create mode 100644 documentation/classarm__compute_1_1_n_e_batch_normalization_layer__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_n_e_batch_normalization_layer__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_n_e_batch_normalization_layer__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_n_e_batch_normalization_layer_kernel.js create mode 100644 documentation/classarm__compute_1_1_n_e_batch_normalization_layer_kernel.xhtml rename documentation/{classarm__compute_1_1_n_e_h_o_g_non_maxima_suppression_kernel__coll__graph.map => classarm__compute_1_1_n_e_batch_normalization_layer_kernel__coll__graph.map} (62%) create mode 100644 documentation/classarm__compute_1_1_n_e_batch_normalization_layer_kernel__coll__graph.md5 rename documentation/{classarm__compute_1_1_n_e_h_o_g_non_maxima_suppression_kernel__coll__graph.svg => classarm__compute_1_1_n_e_batch_normalization_layer_kernel__coll__graph.svg} (50%) create mode 100644 documentation/classarm__compute_1_1_n_e_convolution_layer_reshape_weights.js create mode 100644 documentation/classarm__compute_1_1_n_e_convolution_layer_reshape_weights.xhtml create mode 100644 documentation/classarm__compute_1_1_n_e_convolution_layer_reshape_weights__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_n_e_convolution_layer_reshape_weights__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_n_e_convolution_layer_reshape_weights__coll__graph.svg delete mode 100644 documentation/classarm__compute_1_1_n_e_convolution_layer_weights_reshape_kernel.js delete mode 100644 documentation/classarm__compute_1_1_n_e_convolution_layer_weights_reshape_kernel__coll__graph.md5 delete mode 100644 documentation/classarm__compute_1_1_n_e_convolution_layer_weights_reshape_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_n_e_depth_concatenate.js create mode 100644 documentation/classarm__compute_1_1_n_e_depth_concatenate.xhtml create mode 100644 documentation/classarm__compute_1_1_n_e_depth_concatenate__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_n_e_depth_concatenate__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_n_e_depth_concatenate__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_n_e_depth_concatenate_kernel.js create mode 100644 documentation/classarm__compute_1_1_n_e_depth_concatenate_kernel.xhtml rename documentation/{classarm__compute_1_1_n_e_convolution_layer_weights_reshape_kernel__coll__graph.map => classarm__compute_1_1_n_e_depth_concatenate_kernel__coll__graph.map} (60%) create mode 100644 documentation/classarm__compute_1_1_n_e_depth_concatenate_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_n_e_depth_concatenate_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer.js create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer.xhtml create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer_bias_accumulate_kernel.js create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer_bias_accumulate_kernel.xhtml create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer_bias_accumulate_kernel__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer_bias_accumulate_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer_bias_accumulate_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer_kernel.js create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer_kernel.xhtml create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer_kernel__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_n_e_direct_convolution_layer_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_n_e_fully_connected_layer_reshape_weights.js create mode 100644 documentation/classarm__compute_1_1_n_e_fully_connected_layer_reshape_weights.xhtml create mode 100644 documentation/classarm__compute_1_1_n_e_fully_connected_layer_reshape_weights__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_n_e_fully_connected_layer_reshape_weights__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_n_e_fully_connected_layer_reshape_weights__coll__graph.svg delete mode 100644 documentation/classarm__compute_1_1_n_e_h_o_g_non_maxima_suppression_kernel.js delete mode 100644 documentation/classarm__compute_1_1_n_e_h_o_g_non_maxima_suppression_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_n_e_locally_connected_layer.js create mode 100644 documentation/classarm__compute_1_1_n_e_locally_connected_layer.xhtml create mode 100644 documentation/classarm__compute_1_1_n_e_locally_connected_layer__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_n_e_locally_connected_layer__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_n_e_locally_connected_layer__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_n_e_locally_connected_matrix_multiply_kernel.js create mode 100644 documentation/classarm__compute_1_1_n_e_locally_connected_matrix_multiply_kernel.xhtml create mode 100644 documentation/classarm__compute_1_1_n_e_locally_connected_matrix_multiply_kernel__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_n_e_locally_connected_matrix_multiply_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_n_e_locally_connected_matrix_multiply_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_n_e_weights_reshape_kernel.js rename documentation/{classarm__compute_1_1_n_e_convolution_layer_weights_reshape_kernel.xhtml => classarm__compute_1_1_n_e_weights_reshape_kernel.xhtml} (73%) create mode 100644 documentation/classarm__compute_1_1_n_e_weights_reshape_kernel__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_n_e_weights_reshape_kernel__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_n_e_weights_reshape_kernel__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_o_m_p_scheduler.js create mode 100644 documentation/classarm__compute_1_1_o_m_p_scheduler.xhtml create mode 100644 documentation/classarm__compute_1_1_o_m_p_scheduler__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_o_m_p_scheduler__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_o_m_p_scheduler__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_scheduler.js create mode 100644 documentation/classarm__compute_1_1_scheduler.xhtml create mode 100644 documentation/classarm__compute_1_1_single_thread_scheduler.js create mode 100644 documentation/classarm__compute_1_1_single_thread_scheduler.xhtml create mode 100644 documentation/classarm__compute_1_1_single_thread_scheduler__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_single_thread_scheduler__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_single_thread_scheduler__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_sub_tensor.js create mode 100644 documentation/classarm__compute_1_1_sub_tensor.xhtml create mode 100644 documentation/classarm__compute_1_1_sub_tensor__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_sub_tensor__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_sub_tensor__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_sub_tensor_info.js create mode 100644 documentation/classarm__compute_1_1_sub_tensor_info.xhtml create mode 100644 documentation/classarm__compute_1_1_sub_tensor_info__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_sub_tensor_info__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_sub_tensor_info__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_tensor_info__coll__graph.map create mode 100644 documentation/classarm__compute_1_1_tensor_info__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1_tensor_info__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1_weights_info.js create mode 100644 documentation/classarm__compute_1_1_weights_info.xhtml create mode 100644 documentation/classarm__compute_1_1detail_1_1compare__dimension.js create mode 100644 documentation/classarm__compute_1_1detail_1_1compare__dimension.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_activation_functions.js create mode 100644 documentation/classarm__compute_1_1test_1_1_activation_functions.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_activation_functions__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_activation_functions__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_activation_functions__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_activation_layer_data_object.js create mode 100644 documentation/classarm__compute_1_1test_1_1_activation_layer_data_object.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_activation_layer_data_object__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_activation_layer_data_object__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_activation_layer_data_object__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_activation_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_activation_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_activation_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_activation_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_activation_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_convolution_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_convolution_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_convolution_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_convolution_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_convolution_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_fully_connected_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_fully_connected_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_fully_connected_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_fully_connected_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_fully_connected_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_normalization_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_normalization_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_normalization_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_normalization_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_normalization_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_pooling_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_pooling_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_pooling_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_pooling_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_alex_net_pooling_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_all_data_types.js create mode 100644 documentation/classarm__compute_1_1test_1_1_all_data_types.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_all_data_types__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_all_data_types__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_all_data_types__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_batch_normalization_layer_data_object.js create mode 100644 documentation/classarm__compute_1_1test_1_1_batch_normalization_layer_data_object.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_batch_normalization_layer_data_object__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_batch_normalization_layer_data_object__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_batch_normalization_layer_data_object__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_border_modes.js create mode 100644 documentation/classarm__compute_1_1test_1_1_border_modes.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_data_types.js create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_data_types.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_data_types__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_data_types__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_data_types__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_fixed_point_data_types.js create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_fixed_point_data_types.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_fixed_point_data_types__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_fixed_point_data_types__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_fixed_point_data_types__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_float_data_types.js create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_float_data_types.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_float_data_types__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_float_data_types__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_c_n_n_float_data_types__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_convert_policies.js create mode 100644 documentation/classarm__compute_1_1test_1_1_convert_policies.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_convolution_layer_data_object.js create mode 100644 documentation/classarm__compute_1_1test_1_1_convolution_layer_data_object.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_convolution_layer_data_object__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_convolution_layer_data_object__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_convolution_layer_data_object__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_data_types.js create mode 100644 documentation/classarm__compute_1_1test_1_1_data_types.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_direct_convolution_shapes.js create mode 100644 documentation/classarm__compute_1_1test_1_1_direct_convolution_shapes.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_direct_convolution_shapes__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_direct_convolution_shapes__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_direct_convolution_shapes__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_fixed_point_data_types.js create mode 100644 documentation/classarm__compute_1_1test_1_1_fixed_point_data_types.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_fixed_point_data_types__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_fixed_point_data_types__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_fixed_point_data_types__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_float_data_types.js create mode 100644 documentation/classarm__compute_1_1test_1_1_float_data_types.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_float_data_types__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_float_data_types__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_float_data_types__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_fully_connected_layer_data_object.js create mode 100644 documentation/classarm__compute_1_1test_1_1_fully_connected_layer_data_object.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_fully_connected_layer_data_object__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_fully_connected_layer_data_object__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_fully_connected_layer_data_object__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_g_e_m_m_data_object.js create mode 100644 documentation/classarm__compute_1_1test_1_1_g_e_m_m_data_object.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_g_e_m_m_data_object__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_g_e_m_m_data_object__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_g_e_m_m_data_object__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_generic_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_generic_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_activation_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_activation_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_activation_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_activation_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_activation_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_convolution_layer_dataset1.js create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_convolution_layer_dataset1.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_convolution_layer_dataset1__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_convolution_layer_dataset1__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_convolution_layer_dataset1__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_convolution_layer_dataset2.js create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_convolution_layer_dataset2.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_convolution_layer_dataset2__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_convolution_layer_dataset2__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_convolution_layer_dataset2__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_fully_connected_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_fully_connected_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_fully_connected_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_fully_connected_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_fully_connected_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_g_e_m_m_dataset1.js create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_g_e_m_m_dataset1.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_g_e_m_m_dataset1__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_g_e_m_m_dataset1__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_g_e_m_m_dataset1__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_g_e_m_m_dataset2.js create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_g_e_m_m_dataset2.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_g_e_m_m_dataset2__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_g_e_m_m_dataset2__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_g_e_m_m_dataset2__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_normalization_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_normalization_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_normalization_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_normalization_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_normalization_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_pooling_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_pooling_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_pooling_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_pooling_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_goog_le_net_pooling_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_i_accessor.js create mode 100644 documentation/classarm__compute_1_1test_1_1_i_accessor.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_image_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_image_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_interpolation_policies.js create mode 100644 documentation/classarm__compute_1_1test_1_1_interpolation_policies.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_large_fully_connected_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_large_fully_connected_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_large_fully_connected_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_large_fully_connected_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_large_fully_connected_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_large_g_e_m_m_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_large_g_e_m_m_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_large_g_e_m_m_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_large_g_e_m_m_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_large_g_e_m_m_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_large_images.js create mode 100644 documentation/classarm__compute_1_1test_1_1_large_images.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_large_images__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_large_images__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_large_images__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_large_shapes.js create mode 100644 documentation/classarm__compute_1_1test_1_1_large_shapes.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_large_shapes__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_large_shapes__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_large_shapes__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_activation_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_activation_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_activation_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_activation_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_activation_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_convolution_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_convolution_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_convolution_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_convolution_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_convolution_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_fully_connected_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_fully_connected_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_fully_connected_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_fully_connected_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_fully_connected_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_pooling_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_pooling_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_pooling_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_pooling_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_le_net5_pooling_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_normalization_layer_data_object.js create mode 100644 documentation/classarm__compute_1_1test_1_1_normalization_layer_data_object.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_normalization_layer_data_object__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_normalization_layer_data_object__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_normalization_layer_data_object__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_normalization_types.js create mode 100644 documentation/classarm__compute_1_1test_1_1_normalization_types.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_pooling_layer_data_object.js create mode 100644 documentation/classarm__compute_1_1test_1_1_pooling_layer_data_object.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_pooling_layer_data_object__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_pooling_layer_data_object__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_pooling_layer_data_object__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_program_options.js create mode 100644 documentation/classarm__compute_1_1test_1_1_program_options.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_random_batch_normalization_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_random_batch_normalization_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_random_batch_normalization_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_random_batch_normalization_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_random_batch_normalization_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_random_pooling_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_random_pooling_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_random_pooling_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_random_pooling_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_random_pooling_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_raw_tensor.js create mode 100644 documentation/classarm__compute_1_1test_1_1_raw_tensor.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_rounding_policies.js create mode 100644 documentation/classarm__compute_1_1test_1_1_rounding_policies.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_shape_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_shape_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_signed_data_types.js create mode 100644 documentation/classarm__compute_1_1test_1_1_signed_data_types.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_signed_data_types__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_signed_data_types__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_signed_data_types__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_small1_d_shape.js create mode 100644 documentation/classarm__compute_1_1test_1_1_small1_d_shape.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_small1_d_shape__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_small1_d_shape__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_small1_d_shape__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_small_convolution_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_small_convolution_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_small_convolution_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_small_convolution_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_small_convolution_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_small_fully_connected_layer_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_small_fully_connected_layer_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_small_fully_connected_layer_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_small_fully_connected_layer_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_small_fully_connected_layer_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_small_g_e_m_m_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_small_g_e_m_m_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_small_g_e_m_m_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_small_g_e_m_m_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_small_g_e_m_m_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_small_images.js create mode 100644 documentation/classarm__compute_1_1test_1_1_small_images.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_small_images__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_small_images__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_small_images__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_small_shapes.js create mode 100644 documentation/classarm__compute_1_1test_1_1_small_shapes.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_small_shapes__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_small_shapes__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_small_shapes__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_tensor_cache.js create mode 100644 documentation/classarm__compute_1_1test_1_1_tensor_cache.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_tensor_library.js create mode 100644 documentation/classarm__compute_1_1test_1_1_tensor_library.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_threshold_data_object.js create mode 100644 documentation/classarm__compute_1_1test_1_1_threshold_data_object.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_threshold_dataset.js create mode 100644 documentation/classarm__compute_1_1test_1_1_threshold_dataset.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_threshold_dataset__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_threshold_dataset__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_threshold_dataset__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1_unsigned_data_types.js create mode 100644 documentation/classarm__compute_1_1test_1_1_unsigned_data_types.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1_unsigned_data_types__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1_unsigned_data_types__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1_unsigned_data_types__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_activation_layer.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_activation_layer.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_activation_layer__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_activation_layer__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_activation_layer__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_alex_net_fixture.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_alex_net_fixture.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_alex_net_fixture__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_alex_net_fixture__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_alex_net_fixture__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_convolution_layer.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_convolution_layer.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_convolution_layer__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_convolution_layer__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_convolution_layer__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_cycle_counter.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_cycle_counter.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_cycle_counter__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_cycle_counter__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_cycle_counter__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_fully_connected_layer.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_fully_connected_layer.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_fully_connected_layer__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_fully_connected_layer__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_fully_connected_layer__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_g_e_m_m.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_g_e_m_m.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_g_e_m_m__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_g_e_m_m__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_g_e_m_m__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_instruction_counter.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_instruction_counter.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_instruction_counter__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_instruction_counter__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_instruction_counter__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_instrument.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_instrument.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_instrument_1_1_i_measurement.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_instrument_1_1_i_measurement.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_instrument_1_1_measurement.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_instrument_1_1_measurement.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_instrument_1_1_measurement__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_instrument_1_1_measurement__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_instrument_1_1_measurement__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_le_net5_fixture.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_le_net5_fixture.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_le_net5_fixture__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_le_net5_fixture__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_le_net5_fixture__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_normalization_layer.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_normalization_layer.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_normalization_layer__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_normalization_layer__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_normalization_layer__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_pooling_layer.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_pooling_layer.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_pooling_layer__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_pooling_layer__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_pooling_layer__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_profiler.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_profiler.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_wall_clock_timer.js create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_wall_clock_timer.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_wall_clock_timer__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_wall_clock_timer__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1benchmark_1_1_wall_clock_timer__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1cl_1_1_c_l_accessor.js create mode 100644 documentation/classarm__compute_1_1test_1_1cl_1_1_c_l_accessor.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1cl_1_1_c_l_accessor__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1cl_1_1_c_l_accessor__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1cl_1_1_c_l_accessor__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1fixed__point__arithmetic_1_1fixed__point.js create mode 100644 documentation/classarm__compute_1_1test_1_1fixed__point__arithmetic_1_1fixed__point.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1model__objects_1_1_alex_net.js create mode 100644 documentation/classarm__compute_1_1test_1_1model__objects_1_1_alex_net.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1model__objects_1_1_le_net5.js create mode 100644 documentation/classarm__compute_1_1test_1_1model__objects_1_1_le_net5.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1neon_1_1_n_e_accessor.js create mode 100644 documentation/classarm__compute_1_1test_1_1neon_1_1_n_e_accessor.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1neon_1_1_n_e_accessor__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1neon_1_1_n_e_accessor__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1neon_1_1_n_e_accessor__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1performance_1_1_performance_program_options.js create mode 100644 documentation/classarm__compute_1_1test_1_1performance_1_1_performance_program_options.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1performance_1_1_performance_program_options__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1performance_1_1_performance_program_options__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1performance_1_1_performance_program_options__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1validation_1_1_reference.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1validation_1_1_reference_c_p_p.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1validation_1_1_reference_c_p_p__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1validation_1_1_reference_c_p_p__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1validation_1_1_reference_c_p_p__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1test_1_1validation_1_1_tensor.js create mode 100644 documentation/classarm__compute_1_1test_1_1validation_1_1_tensor.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1validation_1_1_tensor_factory.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1validation_1_1_validation_program_options.js create mode 100644 documentation/classarm__compute_1_1test_1_1validation_1_1_validation_program_options.xhtml create mode 100644 documentation/classarm__compute_1_1test_1_1validation_1_1_validation_program_options__coll__graph.map create mode 100644 documentation/classarm__compute_1_1test_1_1validation_1_1_validation_program_options__coll__graph.md5 create mode 100644 documentation/classarm__compute_1_1test_1_1validation_1_1_validation_program_options__coll__graph.svg create mode 100644 documentation/classarm__compute_1_1utils_1_1_p_p_m_loader.js create mode 100644 documentation/classarm__compute_1_1utils_1_1_p_p_m_loader.xhtml delete mode 100644 documentation/classtest__helpers_1_1_p_p_m_loader.js delete mode 100644 documentation/classtest__helpers_1_1_p_p_m_loader.xhtml create mode 100644 documentation/concatenate_8cl.js create mode 100644 documentation/concatenate_8cl.xhtml create mode 100644 documentation/concatenate_8cl_source.xhtml rename documentation/{dir_000004_000006.xhtml => dir_000004_000008.xhtml} (90%) rename documentation/{dir_000005_000006.xhtml => dir_000005_000008.xhtml} (89%) delete mode 100644 documentation/dir_000006_000007.xhtml create mode 100644 documentation/dir_000007_000000.xhtml rename documentation/{dir_000008_000000.xhtml => dir_000007_000035.xhtml} (59%) create mode 100644 documentation/dir_000007_000036.xhtml create mode 100644 documentation/dir_000007_000037.xhtml rename documentation/{dir_000006_000004.xhtml => dir_000008_000004.xhtml} (95%) rename documentation/{dir_000006_000005.xhtml => dir_000008_000005.xhtml} (94%) create mode 100644 documentation/dir_000008_000009.xhtml delete mode 100644 documentation/dir_000009_000001.xhtml rename documentation/{dir_000007_000004.xhtml => dir_000009_000004.xhtml} (94%) delete mode 100644 documentation/dir_000010_000011.xhtml create mode 100644 documentation/dir_000011_000000.xhtml delete mode 100644 documentation/dir_000011_000001.xhtml create mode 100644 documentation/dir_000012_000001.xhtml create mode 100644 documentation/dir_000012_000013.xhtml delete mode 100644 documentation/dir_000013_000014.xhtml delete mode 100644 documentation/dir_000014_000001.xhtml create mode 100644 documentation/dir_000015_000001.xhtml create mode 100644 documentation/dir_000015_000016.xhtml create mode 100644 documentation/dir_000016_000001.xhtml rename documentation/{dir_000015_000000.xhtml => dir_000018_000000.xhtml} (53%) rename documentation/{dir_000015_000008.xhtml => dir_000018_000011.xhtml} (70%) create mode 100644 documentation/dir_000023_000000.xhtml create mode 100644 documentation/dir_000023_000034.xhtml create mode 100644 documentation/dir_000023_000035.xhtml create mode 100644 documentation/dir_000023_000036.xhtml create mode 100644 documentation/dir_000023_000037.xhtml create mode 100644 documentation/dir_000024_000000.xhtml create mode 100644 documentation/dir_000024_000029.xhtml create mode 100644 documentation/dir_000024_000035.xhtml create mode 100644 documentation/dir_000024_000037.xhtml create mode 100644 documentation/dir_000025_000000.xhtml create mode 100644 documentation/dir_000025_000029.xhtml create mode 100644 documentation/dir_000025_000036.xhtml create mode 100644 documentation/dir_000025_000037.xhtml create mode 100644 documentation/dir_000026_000000.xhtml create mode 100644 documentation/dir_000026_000036.xhtml create mode 100644 documentation/dir_000026_000037.xhtml create mode 100644 documentation/dir_000027_000000.xhtml create mode 100644 documentation/dir_000027_000035.xhtml create mode 100644 documentation/dir_000027_000037.xhtml create mode 100644 documentation/dir_000028_000000.xhtml create mode 100644 documentation/dir_000028_000036.xhtml create mode 100644 documentation/dir_000028_000037.xhtml create mode 100644 documentation/dir_000029_000037.xhtml create mode 100644 documentation/dir_000030_000000.xhtml create mode 100644 documentation/dir_000030_000034.xhtml create mode 100644 documentation/dir_000030_000035.xhtml create mode 100644 documentation/dir_000030_000036.xhtml create mode 100644 documentation/dir_000031_000000.xhtml create mode 100644 documentation/dir_000031_000033.xhtml create mode 100644 documentation/dir_000031_000035.xhtml create mode 100644 documentation/dir_000032_000000.xhtml create mode 100644 documentation/dir_000032_000033.xhtml create mode 100644 documentation/dir_000032_000036.xhtml create mode 100644 documentation/dir_000033_000034.xhtml create mode 100644 documentation/dir_000035_000000.xhtml create mode 100644 documentation/dir_000036_000000.xhtml create mode 100644 documentation/dir_000037_000000.xhtml create mode 100644 documentation/dir_000038_000037.xhtml create mode 100644 documentation/dir_000039_000000.xhtml create mode 100644 documentation/dir_000039_000036.xhtml create mode 100644 documentation/dir_000040_000000.xhtml create mode 100644 documentation/dir_0538a82dae0483a21d0f1e9d7e0a45c6.js create mode 100644 documentation/dir_0538a82dae0483a21d0f1e9d7e0a45c6.xhtml create mode 100644 documentation/dir_0538a82dae0483a21d0f1e9d7e0a45c6_dep.map create mode 100644 documentation/dir_0538a82dae0483a21d0f1e9d7e0a45c6_dep.md5 create mode 100644 documentation/dir_0538a82dae0483a21d0f1e9d7e0a45c6_dep.svg create mode 100644 documentation/dir_0b3c54e6965035dc3ca2dd424a2d1395.js create mode 100644 documentation/dir_0b3c54e6965035dc3ca2dd424a2d1395.xhtml create mode 100644 documentation/dir_0b3c54e6965035dc3ca2dd424a2d1395_dep.map create mode 100644 documentation/dir_0b3c54e6965035dc3ca2dd424a2d1395_dep.md5 create mode 100644 documentation/dir_0b3c54e6965035dc3ca2dd424a2d1395_dep.svg create mode 100644 documentation/dir_0c108762cda89b68d4571cbbabdf46df.js create mode 100644 documentation/dir_0c108762cda89b68d4571cbbabdf46df.xhtml create mode 100644 documentation/dir_0c108762cda89b68d4571cbbabdf46df_dep.map create mode 100644 documentation/dir_0c108762cda89b68d4571cbbabdf46df_dep.md5 create mode 100644 documentation/dir_0c108762cda89b68d4571cbbabdf46df_dep.svg create mode 100644 documentation/dir_1e190f7698f059a15072d193afa255c8.js create mode 100644 documentation/dir_1e190f7698f059a15072d193afa255c8.xhtml create mode 100644 documentation/dir_1e190f7698f059a15072d193afa255c8_dep.map create mode 100644 documentation/dir_1e190f7698f059a15072d193afa255c8_dep.md5 create mode 100644 documentation/dir_1e190f7698f059a15072d193afa255c8_dep.svg create mode 100644 documentation/dir_23b0737c0060f5cc12150dca9a230e10.js create mode 100644 documentation/dir_23b0737c0060f5cc12150dca9a230e10.xhtml create mode 100644 documentation/dir_23b0737c0060f5cc12150dca9a230e10_dep.map create mode 100644 documentation/dir_23b0737c0060f5cc12150dca9a230e10_dep.md5 create mode 100644 documentation/dir_23b0737c0060f5cc12150dca9a230e10_dep.svg create mode 100644 documentation/dir_2661e87454575451859bd783c869d609.js create mode 100644 documentation/dir_2661e87454575451859bd783c869d609.xhtml create mode 100644 documentation/dir_2661e87454575451859bd783c869d609_dep.map create mode 100644 documentation/dir_2661e87454575451859bd783c869d609_dep.md5 create mode 100644 documentation/dir_2661e87454575451859bd783c869d609_dep.svg create mode 100644 documentation/dir_445738e6fed662d0b65e690e3f1e1de0.js create mode 100644 documentation/dir_445738e6fed662d0b65e690e3f1e1de0.xhtml create mode 100644 documentation/dir_445738e6fed662d0b65e690e3f1e1de0_dep.map create mode 100644 documentation/dir_445738e6fed662d0b65e690e3f1e1de0_dep.md5 create mode 100644 documentation/dir_445738e6fed662d0b65e690e3f1e1de0_dep.svg create mode 100644 documentation/dir_4da487531dc5bdc76b523c9e82b781b1.js create mode 100644 documentation/dir_4da487531dc5bdc76b523c9e82b781b1.xhtml create mode 100644 documentation/dir_4da487531dc5bdc76b523c9e82b781b1_dep.map create mode 100644 documentation/dir_4da487531dc5bdc76b523c9e82b781b1_dep.md5 create mode 100644 documentation/dir_4da487531dc5bdc76b523c9e82b781b1_dep.svg create mode 100644 documentation/dir_4f2df8950dc650bf7cf9176fae02facc.js create mode 100644 documentation/dir_4f2df8950dc650bf7cf9176fae02facc.xhtml create mode 100644 documentation/dir_4f2df8950dc650bf7cf9176fae02facc_dep.map create mode 100644 documentation/dir_4f2df8950dc650bf7cf9176fae02facc_dep.md5 create mode 100644 documentation/dir_4f2df8950dc650bf7cf9176fae02facc_dep.svg create mode 100644 documentation/dir_59425e443f801f1f2fd8bbe4959a3ccf.js create mode 100644 documentation/dir_59425e443f801f1f2fd8bbe4959a3ccf.xhtml create mode 100644 documentation/dir_59425e443f801f1f2fd8bbe4959a3ccf_dep.map create mode 100644 documentation/dir_59425e443f801f1f2fd8bbe4959a3ccf_dep.md5 create mode 100644 documentation/dir_59425e443f801f1f2fd8bbe4959a3ccf_dep.svg create mode 100644 documentation/dir_5df60b4dac2e83e111e155bc5ea540f9.js create mode 100644 documentation/dir_5df60b4dac2e83e111e155bc5ea540f9.xhtml create mode 100644 documentation/dir_5df60b4dac2e83e111e155bc5ea540f9_dep.map create mode 100644 documentation/dir_5df60b4dac2e83e111e155bc5ea540f9_dep.md5 create mode 100644 documentation/dir_5df60b4dac2e83e111e155bc5ea540f9_dep.svg create mode 100644 documentation/dir_896191444cf08ec8ade6f88256cb2bdd.js create mode 100644 documentation/dir_896191444cf08ec8ade6f88256cb2bdd.xhtml create mode 100644 documentation/dir_896191444cf08ec8ade6f88256cb2bdd_dep.map create mode 100644 documentation/dir_896191444cf08ec8ade6f88256cb2bdd_dep.md5 create mode 100644 documentation/dir_896191444cf08ec8ade6f88256cb2bdd_dep.svg create mode 100644 documentation/dir_a133c6c93c82e8721b2f0a6d208afd33.js create mode 100644 documentation/dir_a133c6c93c82e8721b2f0a6d208afd33.xhtml create mode 100644 documentation/dir_a133c6c93c82e8721b2f0a6d208afd33_dep.map create mode 100644 documentation/dir_a133c6c93c82e8721b2f0a6d208afd33_dep.md5 create mode 100644 documentation/dir_a133c6c93c82e8721b2f0a6d208afd33_dep.svg create mode 100644 documentation/dir_b496024afd63bafbf4e8da80d7d3a81a_dep.map create mode 100644 documentation/dir_b496024afd63bafbf4e8da80d7d3a81a_dep.md5 create mode 100644 documentation/dir_b496024afd63bafbf4e8da80d7d3a81a_dep.svg create mode 100644 documentation/dir_b88a6d23833cc7e200916be8af5e3057.js create mode 100644 documentation/dir_b88a6d23833cc7e200916be8af5e3057.xhtml create mode 100644 documentation/dir_b88a6d23833cc7e200916be8af5e3057_dep.map create mode 100644 documentation/dir_b88a6d23833cc7e200916be8af5e3057_dep.md5 create mode 100644 documentation/dir_b88a6d23833cc7e200916be8af5e3057_dep.svg create mode 100644 documentation/dir_ba1dc693ed461bc4c6534d3b34c816d0.js create mode 100644 documentation/dir_ba1dc693ed461bc4c6534d3b34c816d0.xhtml create mode 100644 documentation/dir_ba1dc693ed461bc4c6534d3b34c816d0_dep.map create mode 100644 documentation/dir_ba1dc693ed461bc4c6534d3b34c816d0_dep.md5 create mode 100644 documentation/dir_ba1dc693ed461bc4c6534d3b34c816d0_dep.svg delete mode 100644 documentation/dir_c1aaaab7f10e8303cab52138d50f8521.js delete mode 100644 documentation/dir_c1aaaab7f10e8303cab52138d50f8521_dep.map delete mode 100644 documentation/dir_c1aaaab7f10e8303cab52138d50f8521_dep.md5 create mode 100644 documentation/dir_cbdb8362360e11eafe2fa3bc74cf0ffd.js rename documentation/{dir_c1aaaab7f10e8303cab52138d50f8521.xhtml => dir_cbdb8362360e11eafe2fa3bc74cf0ffd.xhtml} (81%) create mode 100644 documentation/dir_cbdb8362360e11eafe2fa3bc74cf0ffd_dep.map create mode 100644 documentation/dir_cbdb8362360e11eafe2fa3bc74cf0ffd_dep.md5 rename documentation/{dir_c1aaaab7f10e8303cab52138d50f8521_dep.svg => dir_cbdb8362360e11eafe2fa3bc74cf0ffd_dep.svg} (67%) create mode 100644 documentation/dir_e384de53230b0fb8812f7ca98fdb0e65.js create mode 100644 documentation/dir_e384de53230b0fb8812f7ca98fdb0e65.xhtml create mode 100644 documentation/dir_e384de53230b0fb8812f7ca98fdb0e65_dep.map create mode 100644 documentation/dir_e384de53230b0fb8812f7ca98fdb0e65_dep.md5 create mode 100644 documentation/dir_e384de53230b0fb8812f7ca98fdb0e65_dep.svg create mode 100644 documentation/dir_e7c7b16542faa38cb4655ff1750d3604.js create mode 100644 documentation/dir_e7c7b16542faa38cb4655ff1750d3604.xhtml create mode 100644 documentation/dir_e7c7b16542faa38cb4655ff1750d3604_dep.map create mode 100644 documentation/dir_e7c7b16542faa38cb4655ff1750d3604_dep.md5 create mode 100644 documentation/dir_e7c7b16542faa38cb4655ff1750d3604_dep.svg create mode 100644 documentation/dir_e92efd873d292d9829f03334575b107c.js create mode 100644 documentation/dir_e92efd873d292d9829f03334575b107c.xhtml create mode 100644 documentation/dir_e92efd873d292d9829f03334575b107c_dep.map create mode 100644 documentation/dir_e92efd873d292d9829f03334575b107c_dep.md5 create mode 100644 documentation/dir_e92efd873d292d9829f03334575b107c_dep.svg create mode 100644 documentation/dir_ec05701f68bea22653d08da5856c9ffc.js create mode 100644 documentation/dir_ec05701f68bea22653d08da5856c9ffc.xhtml create mode 100644 documentation/dir_ec05701f68bea22653d08da5856c9ffc_dep.map create mode 100644 documentation/dir_ec05701f68bea22653d08da5856c9ffc_dep.md5 create mode 100644 documentation/dir_ec05701f68bea22653d08da5856c9ffc_dep.svg create mode 100644 documentation/dir_f482a42326943e614a2ec303fd116962.js create mode 100644 documentation/dir_f482a42326943e614a2ec303fd116962.xhtml create mode 100644 documentation/dir_f482a42326943e614a2ec303fd116962_dep.map create mode 100644 documentation/dir_f482a42326943e614a2ec303fd116962_dep.md5 create mode 100644 documentation/dir_f482a42326943e614a2ec303fd116962_dep.svg create mode 100644 documentation/dir_f7024513cd67abef53e86ee9382ac5ce.js create mode 100644 documentation/dir_f7024513cd67abef53e86ee9382ac5ce.xhtml create mode 100644 documentation/dir_f7024513cd67abef53e86ee9382ac5ce_dep.map create mode 100644 documentation/dir_f7024513cd67abef53e86ee9382ac5ce_dep.md5 create mode 100644 documentation/dir_f7024513cd67abef53e86ee9382ac5ce_dep.svg create mode 100644 documentation/functions_eval.xhtml create mode 100644 documentation/functions_rela.xhtml create mode 100644 documentation/functions_type.xhtml create mode 100644 documentation/globals_a.xhtml create mode 100644 documentation/hog_8cl.xhtml create mode 100644 documentation/hog_8cl_source.xhtml create mode 100644 documentation/inherit_graph_100.map create mode 100644 documentation/inherit_graph_100.md5 create mode 100644 documentation/inherit_graph_100.svg create mode 100644 documentation/inherit_graph_101.map create mode 100644 documentation/inherit_graph_101.md5 create mode 100644 documentation/inherit_graph_101.svg create mode 100644 documentation/inherit_graph_102.map create mode 100644 documentation/inherit_graph_102.md5 create mode 100644 documentation/inherit_graph_102.svg create mode 100644 documentation/inherit_graph_103.map create mode 100644 documentation/inherit_graph_103.md5 create mode 100644 documentation/inherit_graph_103.svg create mode 100644 documentation/inherit_graph_104.map create mode 100644 documentation/inherit_graph_104.md5 create mode 100644 documentation/inherit_graph_104.svg create mode 100644 documentation/inherit_graph_105.map create mode 100644 documentation/inherit_graph_105.md5 create mode 100644 documentation/inherit_graph_105.svg create mode 100644 documentation/inherit_graph_106.map create mode 100644 documentation/inherit_graph_106.md5 create mode 100644 documentation/inherit_graph_106.svg create mode 100644 documentation/inherit_graph_107.map create mode 100644 documentation/inherit_graph_107.md5 create mode 100644 documentation/inherit_graph_107.svg create mode 100644 documentation/inherit_graph_108.map create mode 100644 documentation/inherit_graph_108.md5 create mode 100644 documentation/inherit_graph_108.svg create mode 100644 documentation/inherit_graph_109.map create mode 100644 documentation/inherit_graph_109.md5 create mode 100644 documentation/inherit_graph_109.svg create mode 100644 documentation/inherit_graph_110.map create mode 100644 documentation/inherit_graph_110.md5 create mode 100644 documentation/inherit_graph_110.svg create mode 100644 documentation/inherit_graph_111.map create mode 100644 documentation/inherit_graph_111.md5 create mode 100644 documentation/inherit_graph_111.svg create mode 100644 documentation/inherit_graph_112.map create mode 100644 documentation/inherit_graph_112.md5 create mode 100644 documentation/inherit_graph_112.svg create mode 100644 documentation/inherit_graph_113.map create mode 100644 documentation/inherit_graph_113.md5 create mode 100644 documentation/inherit_graph_113.svg create mode 100644 documentation/inherit_graph_114.map create mode 100644 documentation/inherit_graph_114.md5 create mode 100644 documentation/inherit_graph_114.svg create mode 100644 documentation/inherit_graph_115.map create mode 100644 documentation/inherit_graph_115.md5 create mode 100644 documentation/inherit_graph_115.svg create mode 100644 documentation/inherit_graph_116.map create mode 100644 documentation/inherit_graph_116.md5 create mode 100644 documentation/inherit_graph_116.svg create mode 100644 documentation/inherit_graph_117.map create mode 100644 documentation/inherit_graph_117.md5 create mode 100644 documentation/inherit_graph_117.svg create mode 100644 documentation/inherit_graph_118.map create mode 100644 documentation/inherit_graph_118.md5 create mode 100644 documentation/inherit_graph_118.svg create mode 100644 documentation/inherit_graph_119.map create mode 100644 documentation/inherit_graph_119.md5 create mode 100644 documentation/inherit_graph_119.svg create mode 100644 documentation/inherit_graph_120.map create mode 100644 documentation/inherit_graph_120.md5 create mode 100644 documentation/inherit_graph_120.svg create mode 100644 documentation/inherit_graph_121.map create mode 100644 documentation/inherit_graph_121.md5 create mode 100644 documentation/inherit_graph_121.svg create mode 100644 documentation/inherit_graph_122.map create mode 100644 documentation/inherit_graph_122.md5 create mode 100644 documentation/inherit_graph_122.svg create mode 100644 documentation/inherit_graph_123.map create mode 100644 documentation/inherit_graph_123.md5 create mode 100644 documentation/inherit_graph_123.svg create mode 100644 documentation/inherit_graph_124.map create mode 100644 documentation/inherit_graph_124.md5 create mode 100644 documentation/inherit_graph_124.svg create mode 100644 documentation/inherit_graph_125.map create mode 100644 documentation/inherit_graph_125.md5 create mode 100644 documentation/inherit_graph_125.svg create mode 100644 documentation/inherit_graph_126.map create mode 100644 documentation/inherit_graph_126.md5 create mode 100644 documentation/inherit_graph_126.svg create mode 100644 documentation/inherit_graph_127.map create mode 100644 documentation/inherit_graph_127.md5 create mode 100644 documentation/inherit_graph_127.svg create mode 100644 documentation/inherit_graph_128.map create mode 100644 documentation/inherit_graph_128.md5 create mode 100644 documentation/inherit_graph_128.svg create mode 100644 documentation/inherit_graph_129.map create mode 100644 documentation/inherit_graph_129.md5 create mode 100644 documentation/inherit_graph_129.svg create mode 100644 documentation/inherit_graph_130.map create mode 100644 documentation/inherit_graph_130.md5 create mode 100644 documentation/inherit_graph_130.svg create mode 100644 documentation/inherit_graph_131.map create mode 100644 documentation/inherit_graph_131.md5 create mode 100644 documentation/inherit_graph_131.svg create mode 100644 documentation/inherit_graph_132.map create mode 100644 documentation/inherit_graph_132.md5 create mode 100644 documentation/inherit_graph_132.svg create mode 100644 documentation/inherit_graph_133.map create mode 100644 documentation/inherit_graph_133.md5 create mode 100644 documentation/inherit_graph_133.svg create mode 100644 documentation/inherit_graph_134.map create mode 100644 documentation/inherit_graph_134.md5 create mode 100644 documentation/inherit_graph_134.svg create mode 100644 documentation/inherit_graph_135.map create mode 100644 documentation/inherit_graph_135.md5 create mode 100644 documentation/inherit_graph_135.svg create mode 100644 documentation/inherit_graph_136.map create mode 100644 documentation/inherit_graph_136.md5 create mode 100644 documentation/inherit_graph_136.svg create mode 100644 documentation/inherit_graph_137.map create mode 100644 documentation/inherit_graph_137.md5 create mode 100644 documentation/inherit_graph_137.svg create mode 100644 documentation/inherit_graph_138.map create mode 100644 documentation/inherit_graph_138.md5 create mode 100644 documentation/inherit_graph_138.svg create mode 100644 documentation/inherit_graph_139.map create mode 100644 documentation/inherit_graph_139.md5 create mode 100644 documentation/inherit_graph_139.svg create mode 100644 documentation/inherit_graph_140.map create mode 100644 documentation/inherit_graph_140.md5 create mode 100644 documentation/inherit_graph_140.svg create mode 100644 documentation/inherit_graph_141.map create mode 100644 documentation/inherit_graph_141.md5 create mode 100644 documentation/inherit_graph_141.svg create mode 100644 documentation/inherit_graph_55.map create mode 100644 documentation/inherit_graph_55.md5 create mode 100644 documentation/inherit_graph_55.svg create mode 100644 documentation/inherit_graph_56.map create mode 100644 documentation/inherit_graph_56.md5 create mode 100644 documentation/inherit_graph_56.svg create mode 100644 documentation/inherit_graph_57.map create mode 100644 documentation/inherit_graph_57.md5 create mode 100644 documentation/inherit_graph_57.svg create mode 100644 documentation/inherit_graph_58.map create mode 100644 documentation/inherit_graph_58.md5 create mode 100644 documentation/inherit_graph_58.svg create mode 100644 documentation/inherit_graph_59.map create mode 100644 documentation/inherit_graph_59.md5 create mode 100644 documentation/inherit_graph_59.svg create mode 100644 documentation/inherit_graph_60.map create mode 100644 documentation/inherit_graph_60.md5 create mode 100644 documentation/inherit_graph_60.svg create mode 100644 documentation/inherit_graph_61.map create mode 100644 documentation/inherit_graph_61.md5 create mode 100644 documentation/inherit_graph_61.svg create mode 100644 documentation/inherit_graph_62.map create mode 100644 documentation/inherit_graph_62.md5 create mode 100644 documentation/inherit_graph_62.svg create mode 100644 documentation/inherit_graph_63.map create mode 100644 documentation/inherit_graph_63.md5 create mode 100644 documentation/inherit_graph_63.svg create mode 100644 documentation/inherit_graph_64.map create mode 100644 documentation/inherit_graph_64.md5 create mode 100644 documentation/inherit_graph_64.svg create mode 100644 documentation/inherit_graph_65.map create mode 100644 documentation/inherit_graph_65.md5 create mode 100644 documentation/inherit_graph_65.svg create mode 100644 documentation/inherit_graph_66.map create mode 100644 documentation/inherit_graph_66.md5 create mode 100644 documentation/inherit_graph_66.svg create mode 100644 documentation/inherit_graph_67.map create mode 100644 documentation/inherit_graph_67.md5 create mode 100644 documentation/inherit_graph_67.svg create mode 100644 documentation/inherit_graph_68.map create mode 100644 documentation/inherit_graph_68.md5 create mode 100644 documentation/inherit_graph_68.svg create mode 100644 documentation/inherit_graph_69.map create mode 100644 documentation/inherit_graph_69.md5 create mode 100644 documentation/inherit_graph_69.svg create mode 100644 documentation/inherit_graph_70.map create mode 100644 documentation/inherit_graph_70.md5 create mode 100644 documentation/inherit_graph_70.svg create mode 100644 documentation/inherit_graph_71.map create mode 100644 documentation/inherit_graph_71.md5 create mode 100644 documentation/inherit_graph_71.svg create mode 100644 documentation/inherit_graph_72.map create mode 100644 documentation/inherit_graph_72.md5 create mode 100644 documentation/inherit_graph_72.svg create mode 100644 documentation/inherit_graph_73.map create mode 100644 documentation/inherit_graph_73.md5 create mode 100644 documentation/inherit_graph_73.svg create mode 100644 documentation/inherit_graph_74.map create mode 100644 documentation/inherit_graph_74.md5 create mode 100644 documentation/inherit_graph_74.svg create mode 100644 documentation/inherit_graph_75.map create mode 100644 documentation/inherit_graph_75.md5 create mode 100644 documentation/inherit_graph_75.svg create mode 100644 documentation/inherit_graph_76.map create mode 100644 documentation/inherit_graph_76.md5 create mode 100644 documentation/inherit_graph_76.svg create mode 100644 documentation/inherit_graph_77.map create mode 100644 documentation/inherit_graph_77.md5 create mode 100644 documentation/inherit_graph_77.svg create mode 100644 documentation/inherit_graph_78.map create mode 100644 documentation/inherit_graph_78.md5 create mode 100644 documentation/inherit_graph_78.svg create mode 100644 documentation/inherit_graph_79.map create mode 100644 documentation/inherit_graph_79.md5 create mode 100644 documentation/inherit_graph_79.svg create mode 100644 documentation/inherit_graph_80.map create mode 100644 documentation/inherit_graph_80.md5 create mode 100644 documentation/inherit_graph_80.svg create mode 100644 documentation/inherit_graph_81.map create mode 100644 documentation/inherit_graph_81.md5 create mode 100644 documentation/inherit_graph_81.svg create mode 100644 documentation/inherit_graph_82.map create mode 100644 documentation/inherit_graph_82.md5 create mode 100644 documentation/inherit_graph_82.svg create mode 100644 documentation/inherit_graph_83.map create mode 100644 documentation/inherit_graph_83.md5 create mode 100644 documentation/inherit_graph_83.svg create mode 100644 documentation/inherit_graph_84.map create mode 100644 documentation/inherit_graph_84.md5 create mode 100644 documentation/inherit_graph_84.svg create mode 100644 documentation/inherit_graph_85.map create mode 100644 documentation/inherit_graph_85.md5 create mode 100644 documentation/inherit_graph_85.svg create mode 100644 documentation/inherit_graph_86.map create mode 100644 documentation/inherit_graph_86.md5 create mode 100644 documentation/inherit_graph_86.svg create mode 100644 documentation/inherit_graph_87.map create mode 100644 documentation/inherit_graph_87.md5 create mode 100644 documentation/inherit_graph_87.svg create mode 100644 documentation/inherit_graph_88.map create mode 100644 documentation/inherit_graph_88.md5 create mode 100644 documentation/inherit_graph_88.svg create mode 100644 documentation/inherit_graph_89.map create mode 100644 documentation/inherit_graph_89.md5 create mode 100644 documentation/inherit_graph_89.svg create mode 100644 documentation/inherit_graph_90.map create mode 100644 documentation/inherit_graph_90.md5 create mode 100644 documentation/inherit_graph_90.svg create mode 100644 documentation/inherit_graph_91.map create mode 100644 documentation/inherit_graph_91.md5 create mode 100644 documentation/inherit_graph_91.svg create mode 100644 documentation/inherit_graph_92.map create mode 100644 documentation/inherit_graph_92.md5 create mode 100644 documentation/inherit_graph_92.svg create mode 100644 documentation/inherit_graph_93.map create mode 100644 documentation/inherit_graph_93.md5 create mode 100644 documentation/inherit_graph_93.svg create mode 100644 documentation/inherit_graph_94.map create mode 100644 documentation/inherit_graph_94.md5 create mode 100644 documentation/inherit_graph_94.svg create mode 100644 documentation/inherit_graph_95.map create mode 100644 documentation/inherit_graph_95.md5 create mode 100644 documentation/inherit_graph_95.svg create mode 100644 documentation/inherit_graph_96.map create mode 100644 documentation/inherit_graph_96.md5 create mode 100644 documentation/inherit_graph_96.svg create mode 100644 documentation/inherit_graph_97.map create mode 100644 documentation/inherit_graph_97.md5 create mode 100644 documentation/inherit_graph_97.svg create mode 100644 documentation/inherit_graph_98.map create mode 100644 documentation/inherit_graph_98.md5 create mode 100644 documentation/inherit_graph_98.svg create mode 100644 documentation/inherit_graph_99.map create mode 100644 documentation/inherit_graph_99.md5 create mode 100644 documentation/inherit_graph_99.svg create mode 100644 documentation/model__objects_2_alex_net_8h.xhtml create mode 100644 documentation/model__objects_2_alex_net_8h_source.xhtml create mode 100644 documentation/model__objects_2_le_net5_8h.xhtml create mode 100644 documentation/model__objects_2_le_net5_8h_source.xhtml create mode 100644 documentation/namespacearm__compute_1_1cpp14.js create mode 100644 documentation/namespacearm__compute_1_1detail.js create mode 100644 documentation/namespacearm__compute_1_1detail.xhtml create mode 100644 documentation/namespacearm__compute_1_1test.js create mode 100644 documentation/namespacearm__compute_1_1test.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1benchmark.js create mode 100644 documentation/namespacearm__compute_1_1test_1_1benchmark.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1cl.js create mode 100644 documentation/namespacearm__compute_1_1test_1_1cl.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1cpp11.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1cpp14.js create mode 100644 documentation/namespacearm__compute_1_1test_1_1cpp14.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1fixed__point__arithmetic.js create mode 100644 documentation/namespacearm__compute_1_1test_1_1fixed__point__arithmetic.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1fixed__point__arithmetic_1_1detail.js create mode 100644 documentation/namespacearm__compute_1_1test_1_1fixed__point__arithmetic_1_1detail.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits.js create mode 100644 documentation/namespacearm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1model__objects.js create mode 100644 documentation/namespacearm__compute_1_1test_1_1model__objects.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1neon.js create mode 100644 documentation/namespacearm__compute_1_1test_1_1neon.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1performance.js create mode 100644 documentation/namespacearm__compute_1_1test_1_1performance.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1traits.js create mode 100644 documentation/namespacearm__compute_1_1test_1_1traits.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1validation.js create mode 100644 documentation/namespacearm__compute_1_1test_1_1validation.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1validation_1_1cl.js create mode 100644 documentation/namespacearm__compute_1_1test_1_1validation_1_1cl.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1validation_1_1tensor__operations.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1validation_1_1tensor__visitors.js create mode 100644 documentation/namespacearm__compute_1_1test_1_1validation_1_1tensor__visitors.xhtml create mode 100644 documentation/namespacearm__compute_1_1test_1_1validation_1_1vx.xhtml create mode 100644 documentation/namespacearm__compute_1_1traits.js create mode 100644 documentation/namespacearm__compute_1_1traits.xhtml create mode 100644 documentation/namespacearm__compute_1_1utils.js rename documentation/{namespacetest__helpers.xhtml => namespacearm__compute_1_1utils.xhtml} (51%) create mode 100644 documentation/namespaceboost.js create mode 100644 documentation/namespaceboost.xhtml create mode 100644 documentation/namespaceboost_1_1unit__test.js create mode 100644 documentation/namespaceboost_1_1unit__test.xhtml create mode 100644 documentation/namespaceboost_1_1unit__test_1_1data.js create mode 100644 documentation/namespaceboost_1_1unit__test_1_1data.xhtml create mode 100644 documentation/namespaceboost_1_1unit__test_1_1data_1_1monomorphic.js create mode 100644 documentation/namespaceboost_1_1unit__test_1_1data_1_1monomorphic.xhtml create mode 100644 documentation/namespacemembers_b.xhtml create mode 100644 documentation/namespacemembers_c.xhtml create mode 100644 documentation/namespacemembers_d.xhtml create mode 100644 documentation/namespacemembers_dup.js create mode 100644 documentation/namespacemembers_e.xhtml create mode 100644 documentation/namespacemembers_f.xhtml create mode 100644 documentation/namespacemembers_func.js create mode 100644 documentation/namespacemembers_func_b.xhtml create mode 100644 documentation/namespacemembers_func_c.xhtml create mode 100644 documentation/namespacemembers_func_d.xhtml create mode 100644 documentation/namespacemembers_func_e.xhtml create mode 100644 documentation/namespacemembers_func_f.xhtml create mode 100644 documentation/namespacemembers_func_g.xhtml create mode 100644 documentation/namespacemembers_func_h.xhtml create mode 100644 documentation/namespacemembers_func_i.xhtml create mode 100644 documentation/namespacemembers_func_l.xhtml create mode 100644 documentation/namespacemembers_func_m.xhtml create mode 100644 documentation/namespacemembers_func_n.xhtml create mode 100644 documentation/namespacemembers_func_o.xhtml create mode 100644 documentation/namespacemembers_func_p.xhtml create mode 100644 documentation/namespacemembers_func_r.xhtml create mode 100644 documentation/namespacemembers_func_s.xhtml create mode 100644 documentation/namespacemembers_func_t.xhtml create mode 100644 documentation/namespacemembers_func_u.xhtml create mode 100644 documentation/namespacemembers_func_v.xhtml create mode 100644 documentation/namespacemembers_g.xhtml create mode 100644 documentation/namespacemembers_h.xhtml create mode 100644 documentation/namespacemembers_i.xhtml create mode 100644 documentation/namespacemembers_k.xhtml create mode 100644 documentation/namespacemembers_l.xhtml create mode 100644 documentation/namespacemembers_m.xhtml create mode 100644 documentation/namespacemembers_n.xhtml create mode 100644 documentation/namespacemembers_o.xhtml create mode 100644 documentation/namespacemembers_p.xhtml create mode 100644 documentation/namespacemembers_q.xhtml create mode 100644 documentation/namespacemembers_r.xhtml create mode 100644 documentation/namespacemembers_s.xhtml create mode 100644 documentation/namespacemembers_t.xhtml create mode 100644 documentation/namespacemembers_u.xhtml create mode 100644 documentation/namespacemembers_v.xhtml delete mode 100644 documentation/namespacetest__helpers.js create mode 100644 documentation/navtreeindex16.js create mode 100644 documentation/navtreeindex17.js create mode 100644 documentation/navtreeindex18.js create mode 100644 documentation/navtreeindex19.js create mode 100644 documentation/navtreeindex20.js create mode 100644 documentation/navtreeindex21.js create mode 100644 documentation/navtreeindex22.js create mode 100644 documentation/navtreeindex23.js create mode 100644 documentation/neon__cnn_8cpp.js create mode 100644 documentation/neon__cnn_8cpp.xhtml create mode 100644 documentation/neon__cnn_8cpp_source.xhtml rename documentation/{dir_000013_000012.xhtml => pages.xhtml} (79%) create mode 100644 documentation/search/all_1b.html create mode 100644 documentation/search/all_1b.js create mode 100644 documentation/search/classes_10.html create mode 100644 documentation/search/classes_10.js create mode 100644 documentation/search/classes_11.html create mode 100644 documentation/search/classes_11.js create mode 100644 documentation/search/classes_12.html create mode 100644 documentation/search/classes_12.js create mode 100644 documentation/search/classes_13.html create mode 100644 documentation/search/classes_13.js create mode 100644 documentation/search/classes_14.html create mode 100644 documentation/search/classes_14.js create mode 100644 documentation/search/classes_15.html create mode 100644 documentation/search/classes_15.js create mode 100644 documentation/search/defines_10.html create mode 100644 documentation/search/defines_10.js create mode 100644 documentation/search/defines_11.html create mode 100644 documentation/search/defines_11.js create mode 100644 documentation/search/enums_c.html create mode 100644 documentation/search/enums_c.js create mode 100644 documentation/search/enums_d.html create mode 100644 documentation/search/enums_d.js create mode 100644 documentation/search/enumvalues_12.html create mode 100644 documentation/search/enumvalues_12.js create mode 100644 documentation/search/enumvalues_13.html create mode 100644 documentation/search/enumvalues_13.js create mode 100644 documentation/search/files_14.html create mode 100644 documentation/search/files_14.js create mode 100644 documentation/search/pages_1.html create mode 100644 documentation/search/pages_1.js create mode 100644 documentation/search/pages_2.html create mode 100644 documentation/search/pages_2.js create mode 100644 documentation/search/related_0.html create mode 100644 documentation/search/related_0.js create mode 100644 documentation/search/related_1.html create mode 100644 documentation/search/related_1.js create mode 100644 documentation/search/related_2.html create mode 100644 documentation/search/related_2.js create mode 100644 documentation/search/typedefs_10.html create mode 100644 documentation/search/typedefs_10.js create mode 100644 documentation/search/typedefs_11.html create mode 100644 documentation/search/typedefs_11.js create mode 100644 documentation/search/typedefs_d.html create mode 100644 documentation/search/typedefs_d.js create mode 100644 documentation/search/typedefs_e.html create mode 100644 documentation/search/typedefs_e.js create mode 100644 documentation/search/typedefs_f.html create mode 100644 documentation/search/typedefs_f.js create mode 100644 documentation/struct_detection_window.js create mode 100644 documentation/struct_detection_window.xhtml create mode 100644 documentation/structarm__compute_1_1cpp14_1_1___unique__if.js create mode 100644 documentation/structarm__compute_1_1cpp14_1_1___unique__if.xhtml create mode 100644 documentation/structarm__compute_1_1cpp14_1_1___unique__if_3_01_t[]_4.js create mode 100644 documentation/structarm__compute_1_1cpp14_1_1___unique__if_3_01_t[]_4.xhtml create mode 100644 documentation/structarm__compute_1_1cpp14_1_1___unique__if_3_01_t[_n]_4.js create mode 100644 documentation/structarm__compute_1_1cpp14_1_1___unique__if_3_01_t[_n]_4.xhtml create mode 100644 documentation/structarm__compute_1_1enable__bitwise__ops.xhtml create mode 100644 documentation/structarm__compute_1_1enable__bitwise__ops_3_01arm__compute_1_1_g_p_u_target_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1_user_configuration.js create mode 100644 documentation/structarm__compute_1_1test_1_1_user_configuration.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1_user_configuration__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1_user_configuration__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1_user_configuration__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1common__promoted__signed__type.js create mode 100644 documentation/structarm__compute_1_1test_1_1common__promoted__signed__type.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1cpp14_1_1___unique__if.js create mode 100644 documentation/structarm__compute_1_1test_1_1cpp14_1_1___unique__if.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1cpp14_1_1___unique__if_3_01_t[]_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1cpp14_1_1___unique__if_3_01_t[]_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1cpp14_1_1___unique__if_3_01_t[_n]_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1cpp14_1_1___unique__if_3_01_t[_n]_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1detail_1_1constant__expr.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1detail_1_1functions.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01int16__t_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01int16__t_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01int32__t_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01int32__t_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01int64__t_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01int64__t_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01int8__t_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01int8__t_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01uint16__t_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01uint16__t_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01uint32__t_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01uint32__t_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01uint64__t_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01uint64__t_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01uint8__t_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1fixed__point__arithmetic_1_1traits_1_1promote_3_01uint8__t_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1performance_1_1_performance_user_configuration.js create mode 100644 documentation/structarm__compute_1_1test_1_1performance_1_1_performance_user_configuration.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1performance_1_1_performance_user_configuration__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1performance_1_1_performance_user_configuration__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1performance_1_1_performance_user_configuration__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote_3_01float_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote_3_01float_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote_3_01int16__t_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote_3_01int16__t_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote_3_01int32__t_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote_3_01int32__t_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote_3_01int8__t_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote_3_01int8__t_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote_3_01uint16__t_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote_3_01uint16__t_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote_3_01uint32__t_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote_3_01uint32__t_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote_3_01uint8__t_01_4.js create mode 100644 documentation/structarm__compute_1_1test_1_1traits_1_1promote_3_01uint8__t_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1cl_1_1_c_l_fixture.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1cl_1_1_c_l_fixture.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1match__const.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1match__const.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1absolute__difference__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1absolute__difference__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1absolute__difference__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1absolute__difference__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1absolute__difference__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1activation__layer__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1activation__layer__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1activation__layer__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1activation__layer__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1activation__layer__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1arithmetic__addition__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1arithmetic__addition__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1arithmetic__addition__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1arithmetic__addition__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1arithmetic__addition__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1arithmetic__subtraction__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1arithmetic__subtraction__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1arithmetic__subtraction__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1arithmetic__subtraction__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1arithmetic__subtraction__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1batch__normalization__layer__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1batch__normalization__layer__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1batch__normalization__layer__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1batch__normalization__layer__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1batch__normalization__layer__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1convolution__layer__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1convolution__layer__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1convolution__layer__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1convolution__layer__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1convolution__layer__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1depth__convert__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1depth__convert__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1depth__convert__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1depth__convert__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1depth__convert__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fixed__point__operation__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fixed__point__operation__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fixed__point__operation__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fixed__point__operation__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fixed__point__operation__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fixed__point__pixel__wise__multiplication__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fixed__point__pixel__wise__multiplication__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fixed__point__pixel__wise__multiplication__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fixed__point__pixel__wise__multiplication__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fixed__point__pixel__wise__multiplication__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fully__connected__layer__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fully__connected__layer__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fully__connected__layer__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fully__connected__layer__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1fully__connected__layer__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1gemm__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1gemm__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1gemm__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1gemm__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1gemm__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1normalization__layer__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1normalization__layer__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1normalization__layer__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1normalization__layer__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1normalization__layer__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1pixel__wise__multiplication__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1pixel__wise__multiplication__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1pixel__wise__multiplication__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1pixel__wise__multiplication__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1pixel__wise__multiplication__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1pooling__layer__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1pooling__layer__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1pooling__layer__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1pooling__layer__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1pooling__layer__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1print__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1print__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1print__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1print__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1print__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1softmax__layer__visitor.js create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1softmax__layer__visitor.xhtml create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1softmax__layer__visitor__coll__graph.map create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1softmax__layer__visitor__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1test_1_1validation_1_1tensor__visitors_1_1softmax__layer__visitor__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1traits_1_1is__contained.xhtml create mode 100644 documentation/structarm__compute_1_1traits_1_1is__contained_3_01_t_00_01std_1_1tuple_3_01_t_00_01_ts_8_8_8_01_4_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1traits_1_1is__contained_3_01_t_00_01std_1_1tuple_3_01_t_00_01_ts_8_8_8_01_4_01_4__coll__graph.map create mode 100644 documentation/structarm__compute_1_1traits_1_1is__contained_3_01_t_00_01std_1_1tuple_3_01_t_00_01_ts_8_8_8_01_4_01_4__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1traits_1_1is__contained_3_01_t_00_01std_1_1tuple_3_01_t_00_01_ts_8_8_8_01_4_01_4__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1traits_1_1is__contained_3_01_t_00_01std_1_1tuple_3_01_u_00_01_ts_8_8_8_01_4_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1traits_1_1is__contained_3_01_t_00_01std_1_1tuple_3_01_u_00_01_ts_8_8_8_01_4_01_4__coll__graph.map create mode 100644 documentation/structarm__compute_1_1traits_1_1is__contained_3_01_t_00_01std_1_1tuple_3_01_u_00_01_ts_8_8_8_01_4_01_4__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1traits_1_1is__contained_3_01_t_00_01std_1_1tuple_3_01_u_00_01_ts_8_8_8_01_4_01_4__coll__graph.svg create mode 100644 documentation/structarm__compute_1_1traits_1_1is__contained_3_01_t_00_01std_1_1tuple_3_4_01_4.xhtml create mode 100644 documentation/structarm__compute_1_1traits_1_1is__contained_3_01_t_00_01std_1_1tuple_3_4_01_4__coll__graph.map create mode 100644 documentation/structarm__compute_1_1traits_1_1is__contained_3_01_t_00_01std_1_1tuple_3_4_01_4__coll__graph.md5 create mode 100644 documentation/structarm__compute_1_1traits_1_1is__contained_3_01_t_00_01std_1_1tuple_3_4_01_4__coll__graph.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_actc083718f2a45800f245b4789496ba62a.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_actc083718f2a45800f245b4789496ba62a.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_actc083718f2a45800f245b4789496ba62a.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_activation_functions_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ale08494eac301fe59801c94a1e49323f42.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ale1c097a9fbdfd984a6c1fe1b85dace92a.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ale1c097a9fbdfd984a6c1fe1b85dace92a.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ale1c097a9fbdfd984a6c1fe1b85dace92a.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ale353bc52507aa43f06dbf95e757937ea3.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ale3666c604179e8c5e7b449c9783241da0.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ale3666c604179e8c5e7b449c9783241da0.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ale3666c604179e8c5e7b449c9783241da0.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_all_data_types_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_all_data_types_01_4__coll__graph.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_all_data_types_01_4__coll__graph.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_all_data_types_01_4__coll__graph.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_border_modes_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_border_modes_01_4__coll__graph.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_border_modes_01_4__coll__graph.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_border_modes_01_4__coll__graph.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_c_n654a9ddc0be25ce9f53741a765f23cfb.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_c_n654a9ddc0be25ce9f53741a765f23cfb.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_c_n654a9ddc0be25ce9f53741a765f23cfb.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_c_n_n_data_types_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_c_n_n_data_types_01_4__coll__graph.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_c_n_n_data_types_01_4__coll__graph.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_c_n_n_data_types_01_4__coll__graph.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_c_n_n_fixed_point_data_types_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_c_n_n_float_data_types_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_c_nacf05d9e09b2edcfed642c23987f92f6.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_c_nacf05d9e09b2edcfed642c23987f92f6.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_c_nacf05d9e09b2edcfed642c23987f92f6.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_convert_policies_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_convert_policies_01_4__coll__graph.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_convert_policies_01_4__coll__graph.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_convert_policies_01_4__coll__graph.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_dir618d2b8632fb2a14b2b8ad932f29c702.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_dir618d2b8632fb2a14b2b8ad932f29c702.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_dir618d2b8632fb2a14b2b8ad932f29c702.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_direct_convolution_shapes_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_fix894a7ec222bfe52ff9657f209e49bbb3.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_fix894a7ec222bfe52ff9657f209e49bbb3.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_fix894a7ec222bfe52ff9657f209e49bbb3.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_fixed_point_data_types_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_float_data_types_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_float_data_types_01_4__coll__graph.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_float_data_types_01_4__coll__graph.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_float_data_types_01_4__coll__graph.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_intc1c44e2478649e95c09e2cce657a0700.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_intc1c44e2478649e95c09e2cce657a0700.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_intc1c44e2478649e95c09e2cce657a0700.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_interpolation_policies_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_lar3e88d2c425acaee0299fc505fb789c24.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_lar3e88d2c425acaee0299fc505fb789c24.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_lar3e88d2c425acaee0299fc505fb789c24.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_lar54e71ecc3aab9d0a8e146092477da42b.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_lar54e71ecc3aab9d0a8e146092477da42b.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_lar54e71ecc3aab9d0a8e146092477da42b.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_larb321d1e758b5a8ba2357d45c9fb79214.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_large_g_e_m_m_dataset_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_large_images_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_large_images_01_4__coll__graph.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_large_images_01_4__coll__graph.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_large_images_01_4__coll__graph.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_large_shapes_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_large_shapes_01_4__coll__graph.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_large_shapes_01_4__coll__graph.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_large_shapes_01_4__coll__graph.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_nor40ab218f8dce317d6fb9026633e97dfb.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_nor40ab218f8dce317d6fb9026633e97dfb.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_nor40ab218f8dce317d6fb9026633e97dfb.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_normalization_types_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ran0c09af4ee7a64edb9be5e86462d7cfee.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ran7977c2cddcca626085649dfc89fd0d79.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ran7977c2cddcca626085649dfc89fd0d79.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ran7977c2cddcca626085649dfc89fd0d79.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ranacdaaefe63ce7ff5e3c1fbdb3c2d1461.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ranacdaaefe63ce7ff5e3c1fbdb3c2d1461.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_ranacdaaefe63ce7ff5e3c1fbdb3c2d1461.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_random_pooling_layer_dataset_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_rou6d6a54280c694766b800dca4a14ecd03.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_rou6d6a54280c694766b800dca4a14ecd03.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_rou6d6a54280c694766b800dca4a14ecd03.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_rounding_policies_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_sige8903bc485cfa5e8edbf1f41e67f7e95.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_sige8903bc485cfa5e8edbf1f41e67f7e95.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_sige8903bc485cfa5e8edbf1f41e67f7e95.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_signed_data_types_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_sma37aa36c611469959a5228d982ba942dd.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_sma62b3eab748b476484e57e35656c730cf.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_sma62b3eab748b476484e57e35656c730cf.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_sma62b3eab748b476484e57e35656c730cf.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_sma68a6ec428c610323abd025da83fb53f1.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_sma68a6ec428c610323abd025da83fb53f1.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_sma68a6ec428c610323abd025da83fb53f1.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_sma7e005b651d0eb6b1236ae8f2d63b33e2.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_smafcd58fcc57b0091cc3ea3edcccbccf01.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_smafcd58fcc57b0091cc3ea3edcccbccf01.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_smafcd58fcc57b0091cc3ea3edcccbccf01.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_small1_d_shape_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_small1_d_shape_01_4__coll__graph.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_small1_d_shape_01_4__coll__graph.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_small1_d_shape_01_4__coll__graph.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_small_g_e_m_m_dataset_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_small_images_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_small_images_01_4__coll__graph.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_small_images_01_4__coll__graph.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_small_images_01_4__coll__graph.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_small_shapes_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_small_shapes_01_4__coll__graph.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_small_shapes_01_4__coll__graph.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_small_shapes_01_4__coll__graph.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_thr20a06b6ec29decfcd86dcbb31db5a7cc.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_thr20a06b6ec29decfcd86dcbb31db5a7cc.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_thr20a06b6ec29decfcd86dcbb31db5a7cc.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_threshold_dataset_01_4.xhtml create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_uns857987c56fcf4ace7a0307984bdb5675.map create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_uns857987c56fcf4ace7a0307984bdb5675.md5 create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_uns857987c56fcf4ace7a0307984bdb5675.svg create mode 100644 documentation/structboost_1_1unit__test_1_1data_1_1monomorphic_1_1is__dataset_3_01arm__compute_1_1test_1_1_unsigned_data_types_01_4.xhtml delete mode 100644 documentation/test__helpers_2_utils_8h.js delete mode 100644 documentation/test__helpers_2_utils_8h_source.xhtml create mode 100644 documentation/tests.xhtml create mode 100644 documentation/tests_2_types_8h.js create mode 100644 documentation/tests_2_types_8h.xhtml create mode 100644 documentation/tests_2_types_8h_source.xhtml create mode 100644 documentation/tests_2_utils_8h.js create mode 100644 documentation/tests_2_utils_8h.xhtml create mode 100644 documentation/tests_2_utils_8h_source.xhtml create mode 100644 documentation/tests_2validation_2_fixed_point_8h.js create mode 100644 documentation/tests_2validation_2_fixed_point_8h.xhtml create mode 100644 documentation/tests_2validation_2_fixed_point_8h_source.xhtml create mode 100644 documentation/tests_2validation_2_helpers_8h.js create mode 100644 documentation/tests_2validation_2_helpers_8h.xhtml create mode 100644 documentation/tests_2validation_2_helpers_8h_source.xhtml create mode 100644 documentation/tests_2validation_2_tensor_8h.xhtml create mode 100644 documentation/tests_2validation_2_tensor_8h_source.xhtml create mode 100644 documentation/tests_2validation_2_u_n_i_t_2_utils_8cpp.xhtml create mode 100644 documentation/tests_2validation_2_u_n_i_t_2_utils_8cpp_source.xhtml create mode 100644 documentation/utils_2_utils_8cpp.js create mode 100644 documentation/utils_2_utils_8cpp.xhtml create mode 100644 documentation/utils_2_utils_8cpp_source.xhtml create mode 100644 documentation/utils_2_utils_8h.js rename documentation/{test__helpers_2_utils_8h.xhtml => utils_2_utils_8h.xhtml} (69%) create mode 100644 documentation/utils_2_utils_8h_source.xhtml create mode 100644 documentation/validation_2_c_l_2_bitwise_and_8cpp.xhtml create mode 100644 documentation/validation_2_c_l_2_bitwise_and_8cpp_source.xhtml create mode 100644 documentation/validation_2_datasets_8h.xhtml create mode 100644 documentation/validation_2_datasets_8h_source.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_activation_layer_8cpp.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_activation_layer_8cpp_source.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_bitwise_and_8cpp.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_bitwise_and_8cpp_source.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_convolution_layer_8cpp.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_convolution_layer_8cpp_source.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_convolution_layer_direct_8cpp.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_convolution_layer_direct_8cpp_source.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_fully_connected_layer_8cpp.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_fully_connected_layer_8cpp_source.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_g_e_m_m_8cpp.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_g_e_m_m_8cpp_source.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_normalization_layer_8cpp.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_normalization_layer_8cpp_source.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_pooling_2_pooling_layer_8cpp.xhtml create mode 100644 documentation/validation_2_n_e_o_n_2_pooling_2_pooling_layer_8cpp_source.xhtml create mode 100644 documentation/validation_2main_8cpp.js create mode 100644 documentation/validation_2main_8cpp.xhtml create mode 100644 documentation/validation_2main_8cpp_source.xhtml create mode 100644 examples/SConscript create mode 100644 examples/neon_cnn.cpp create mode 100644 opencl-1.2-stubs/SConscript delete mode 100644 opencl-1.2-stubs/sconscript delete mode 100644 sconscript create mode 100644 src/core/CL/ICLHOG.cpp create mode 100644 src/core/CL/ICLMultiHOG.cpp create mode 100644 src/core/CL/cl_kernels/batchnormalization_layer.cl create mode 100644 src/core/CL/cl_kernels/concatenate.cl create mode 100644 src/core/CL/cl_kernels/hog.cl create mode 100644 src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp create mode 100644 src/core/CL/kernels/CLDepthConcatenateKernel.cpp create mode 100644 src/core/CL/kernels/CLHOGDescriptorKernel.cpp create mode 100644 src/core/CL/kernels/CLHOGDetectorKernel.cpp create mode 100644 src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp rename src/core/CL/kernels/{CLConvolutionLayerWeightsReshapeKernel.cpp => CLWeightsReshapeKernel.cpp} (56%) rename src/core/{NEON/kernels/NEHOGNonMaximaSuppressionKernel.cpp => CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp} (85%) create mode 100644 src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp delete mode 100644 src/core/NEON/kernels/NEConvolutionLayerWeightsReshapeKernel.cpp create mode 100644 src/core/NEON/kernels/NEDepthConcatenateKernel.cpp create mode 100644 src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp create mode 100644 src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp create mode 100644 src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp create mode 100644 src/core/NEON/kernels/NEWeightsReshapeKernel.cpp create mode 100644 src/core/SubTensorInfo.cpp create mode 100644 src/runtime/CL/CLHOG.cpp create mode 100644 src/runtime/CL/CLMultiHOG.cpp create mode 100644 src/runtime/CL/CLSubTensor.cpp create mode 100644 src/runtime/CL/functions/CLBatchNormalizationLayer.cpp create mode 100644 src/runtime/CL/functions/CLDepthConcatenate.cpp create mode 100644 src/runtime/CL/functions/CLHOGDescriptor.cpp create mode 100644 src/runtime/CL/functions/CLHOGDetector.cpp create mode 100644 src/runtime/CL/functions/CLHOGGradient.cpp create mode 100644 src/runtime/CL/functions/CLHOGMultiDetection.cpp create mode 100644 src/runtime/CL/functions/CLLocallyConnectedLayer.cpp create mode 100644 src/runtime/CPP/SingleThreadScheduler.cpp create mode 100644 src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp create mode 100644 src/runtime/NEON/functions/NEDepthConcatenate.cpp create mode 100644 src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp create mode 100644 src/runtime/NEON/functions/NELocallyConnectedLayer.cpp create mode 100644 src/runtime/OMP/OMPScheduler.cpp create mode 100644 src/runtime/Scheduler.cpp create mode 100644 src/runtime/SubTensor.cpp create mode 100644 src/runtime/Utils.cpp create mode 100644 tests/CL/CLAccessor.h create mode 100644 tests/CL/Helper.h create mode 100644 tests/Globals.h create mode 100644 tests/IAccessor.h create mode 100644 tests/NEON/Helper.h create mode 100644 tests/NEON/NEAccessor.h create mode 100644 tests/ProgramOptions.cpp create mode 100644 tests/ProgramOptions.h create mode 100644 tests/RawTensor.cpp create mode 100644 tests/RawTensor.h create mode 100644 tests/SConscript create mode 100644 tests/SConscript.orig create mode 100644 tests/TensorCache.h create mode 100644 tests/TensorLibrary.cpp create mode 100644 tests/TensorLibrary.h create mode 100644 tests/TypePrinter.h create mode 100644 tests/TypeReader.h create mode 100644 tests/Types.h create mode 100644 tests/UserConfiguration.cpp create mode 100644 tests/UserConfiguration.h create mode 100644 tests/Utils.h create mode 100644 tests/benchmark/CL/ActivationLayer.cpp create mode 100644 tests/benchmark/CL/BitwiseAnd.cpp create mode 100644 tests/benchmark/CL/ConvolutionLayer.cpp create mode 100644 tests/benchmark/CL/FullyConnectedLayer.cpp create mode 100644 tests/benchmark/CL/GEMM.cpp create mode 100644 tests/benchmark/CL/GEMM.h create mode 100644 tests/benchmark/CL/NormalizationLayer.cpp create mode 100644 tests/benchmark/CL/PoolingLayer.cpp create mode 100644 tests/benchmark/Datasets.h create mode 100644 tests/benchmark/Instrument.h create mode 100644 tests/benchmark/NEON/ActivationLayer.cpp create mode 100644 tests/benchmark/NEON/BitwiseAnd.cpp create mode 100644 tests/benchmark/NEON/ConvolutionLayer.cpp create mode 100644 tests/benchmark/NEON/ConvolutionLayerDirect.cpp create mode 100644 tests/benchmark/NEON/FullyConnectedLayer.cpp create mode 100644 tests/benchmark/NEON/GEMM.cpp create mode 100644 tests/benchmark/NEON/GEMM.h create mode 100644 tests/benchmark/NEON/NormalizationLayer.cpp create mode 100644 tests/benchmark/NEON/PoolingLayer.cpp create mode 100644 tests/benchmark/PMUCounter.cpp create mode 100644 tests/benchmark/PMUCounter.h create mode 100644 tests/benchmark/PerformanceProgramOptions.cpp create mode 100644 tests/benchmark/PerformanceProgramOptions.h create mode 100644 tests/benchmark/PerformanceUserConfiguration.cpp create mode 100644 tests/benchmark/PerformanceUserConfiguration.h create mode 100644 tests/benchmark/Profiler.cpp create mode 100644 tests/benchmark/Profiler.h create mode 100644 tests/benchmark/WallClockTimer.cpp create mode 100644 tests/benchmark/WallClockTimer.h create mode 100644 tests/benchmark/common/ActivationLayer.h create mode 100644 tests/benchmark/common/ConvolutionLayer.h create mode 100644 tests/benchmark/common/FullyConnectedLayer.h create mode 100644 tests/benchmark/common/NormalizationLayer.h create mode 100644 tests/benchmark/common/PoolingLayer.h create mode 100644 tests/benchmark/main.cpp create mode 100644 tests/benchmark/system_tests/CL/AlexNet.cpp create mode 100644 tests/benchmark/system_tests/CL/LeNet5.cpp create mode 100644 tests/benchmark/system_tests/NEON/AlexNet.cpp create mode 100644 tests/benchmark/system_tests/NEON/LeNet5.cpp create mode 100644 tests/benchmark/system_tests/common/AlexNet.h create mode 100644 tests/benchmark/system_tests/common/LeNet5.h create mode 100644 tests/boost_wrapper.h create mode 100644 tests/dataset/ActivationFunctionDataset.h create mode 100644 tests/dataset/ActivationLayerDataset.h create mode 100644 tests/dataset/BatchNormalizationLayerDataset.h create mode 100644 tests/dataset/BorderModeDataset.h create mode 100644 tests/dataset/ConvertPolicyDataset.h create mode 100644 tests/dataset/ConvolutionLayerDataset.h create mode 100644 tests/dataset/DataTypeDatasets.h create mode 100644 tests/dataset/FullyConnectedLayerDataset.h create mode 100644 tests/dataset/GEMMDataset.h create mode 100644 tests/dataset/GenericDataset.h create mode 100644 tests/dataset/ImageDatasets.h create mode 100644 tests/dataset/InterpolationPolicyDataset.h create mode 100644 tests/dataset/NormalizationLayerDataset.h create mode 100644 tests/dataset/NormalizationTypeDataset.h create mode 100644 tests/dataset/PoolingLayerDataset.h create mode 100644 tests/dataset/RoundingPolicyDataset.h create mode 100644 tests/dataset/ShapeDatasets.h create mode 100644 tests/dataset/ThresholdDataset.h create mode 100644 tests/model_objects/AlexNet.h create mode 100644 tests/model_objects/LeNet5.h create mode 100644 tests/validation/CL/BitwiseAnd.cpp create mode 100644 tests/validation/CL/CLFixture.cpp create mode 100644 tests/validation/CL/CLFixture.h create mode 100644 tests/validation/CL/DepthConvert.cpp create mode 100644 tests/validation/CL/FillBorder.cpp create mode 100644 tests/validation/CL/Threshold.cpp create mode 100644 tests/validation/Datasets.h create mode 100644 tests/validation/FixedPoint.h create mode 100644 tests/validation/Helpers.h create mode 100644 tests/validation/NEON/AbsoluteDifference.cpp create mode 100644 tests/validation/NEON/Accumulate.cpp create mode 100644 tests/validation/NEON/AccumulateSquared.cpp create mode 100644 tests/validation/NEON/AccumulateWeighted.cpp create mode 100644 tests/validation/NEON/ActivationLayer.cpp create mode 100644 tests/validation/NEON/ArithmeticAddition.cpp create mode 100644 tests/validation/NEON/ArithmeticSubtraction.cpp create mode 100644 tests/validation/NEON/BatchNormalizationLayer.cpp create mode 100644 tests/validation/NEON/BitwiseAnd.cpp create mode 100644 tests/validation/NEON/BitwiseNot.cpp create mode 100644 tests/validation/NEON/BitwiseOr.cpp create mode 100644 tests/validation/NEON/BitwiseXor.cpp create mode 100644 tests/validation/NEON/Box3x3.cpp create mode 100644 tests/validation/NEON/ConvolutionLayer.cpp create mode 100644 tests/validation/NEON/ConvolutionLayerDirect.cpp create mode 100644 tests/validation/NEON/DepthConvert.cpp create mode 100644 tests/validation/NEON/FillBorder.cpp create mode 100644 tests/validation/NEON/Fixedpoint/Exp_QS8.cpp create mode 100644 tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp create mode 100644 tests/validation/NEON/Fixedpoint/Log_QS8.cpp create mode 100644 tests/validation/NEON/Fixedpoint/Reciprocal_QS8.cpp create mode 100644 tests/validation/NEON/FullyConnectedLayer.cpp create mode 100644 tests/validation/NEON/GEMM.cpp create mode 100644 tests/validation/NEON/IntegralImage.cpp create mode 100644 tests/validation/NEON/NormalizationLayer.cpp create mode 100644 tests/validation/NEON/PixelWiseMultiplication.cpp create mode 100644 tests/validation/NEON/Pooling/PoolingLayer.cpp create mode 100644 tests/validation/NEON/SoftmaxLayer.cpp create mode 100644 tests/validation/NEON/Threshold.cpp create mode 100644 tests/validation/Reference.cpp create mode 100644 tests/validation/Reference.h create mode 100644 tests/validation/ReferenceCPP.cpp create mode 100644 tests/validation/ReferenceCPP.h create mode 100644 tests/validation/Tensor.h create mode 100644 tests/validation/TensorFactory.h create mode 100644 tests/validation/TensorOperations.h create mode 100644 tests/validation/TensorVisitors.h create mode 100644 tests/validation/UNIT/FixedPoint.cpp create mode 100644 tests/validation/UNIT/TensorInfo.cpp create mode 100644 tests/validation/UNIT/TensorShape.cpp create mode 100644 tests/validation/UNIT/Utils.cpp create mode 100644 tests/validation/VX/DepthConvert.cpp create mode 100644 tests/validation/VX/VXHelpers.h create mode 100644 tests/validation/Validation.cpp create mode 100644 tests/validation/Validation.h create mode 100644 tests/validation/ValidationProgramOptions.cpp create mode 100644 tests/validation/ValidationProgramOptions.h create mode 100644 tests/validation/ValidationUserConfiguration.h create mode 100644 tests/validation/main.cpp rename {test_helpers => utils}/Utils.cpp (92%) rename {test_helpers => utils}/Utils.h (98%) diff --git a/README.md b/README.md index d3a2aa5..92cf139 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,16 @@ Please report issues here: https://github.com/ARM-software/ComputeLibrary/issues -Documentation available here: +Documentation available here: +- [v17.06](https://arm-software.github.io/ComputeLibrary/v17.06/) - [v17.05](https://arm-software.github.io/ComputeLibrary/v17.05/) - [v17.04](https://arm-software.github.io/ComputeLibrary/v17.04/) - [v17.03.1](https://arm-software.github.io/ComputeLibrary/v17.03.1/) -Binaries available here: +Binaries available here: +- [v17.06](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.06/arm_compute-v17.06-bin.tar.gz) - [v17.05](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.05/arm_compute-v17.05-bin.tar.gz) - [v17.04](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.04/arm_compute-v17.04-bin.tar.gz) - [v17.03.1](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.03.1/arm_compute-v17.03.1-bin.tar.gz) diff --git a/SConscript b/SConscript new file mode 100644 index 0000000..fa967eb --- /dev/null +++ b/SConscript @@ -0,0 +1,199 @@ +# Copyright (c) 2016, 2017 ARM Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +import collections +import os.path +import re +import subprocess + +VERSION = "v17.06" +SONAME_VERSION="3.0.0" + +Import('env') +Import('vars') + +def build_library(name, sources, static=False): + if static: + obj = arm_compute_env.StaticLibrary(name, source=sources) + else: + if env['set_soname']: + obj = arm_compute_env.SharedLibrary(name, source=sources, SHLIBVERSION = SONAME_VERSION) + + symlinks = [] + # Manually delete symlinks or SCons will get confused: + directory = os.path.dirname(obj[0].path) + library_prefix = obj[0].path[:-(1 + len(SONAME_VERSION))] + real_lib = "%s.%s" % (library_prefix, SONAME_VERSION) + + for f in Glob("#%s*" % library_prefix): + if str(f) != real_lib: + symlinks.append("%s/%s" % (directory,str(f))) + + clean = arm_compute_env.Command('clean-%s' % str(obj[0]), [], Delete(symlinks)) + Default(clean) + Depends(obj, clean) + else: + obj = arm_compute_env.SharedLibrary(name, source=sources) + + Default(obj) + return obj + +def resolve_includes(target, source, env): + # File collection + FileEntry = collections.namedtuple('FileEntry', 'target_name file_contents') + + # Include pattern + pattern = re.compile("#include \"(.*)\"") + + # Get file contents + files = [] + for i in range(len(source)): + src = source[i] + dst = target[i] + contents = src.get_contents().splitlines() + entry = FileEntry(target_name=dst, file_contents=contents) + files.append((os.path.basename(src.get_path()),entry)) + + # Create dictionary of tupled list + files_dict = dict(files) + + # Check for includes (can only be files in the same folder) + final_files = [] + for file in files: + done = False + tmp_file = file[1].file_contents + while not done: + file_count = 0 + updated_file = [] + for line in tmp_file: + found = pattern.search(line) + if found: + include_file = found.group(1) + data = files_dict[include_file].file_contents + updated_file.extend(data) + else: + updated_file.append(line) + file_count += 1 + + # Check if all include are replaced. + if file_count == len(tmp_file): + done = True + + # Update temp file + tmp_file = updated_file + + # Append and prepend string literal identifiers and add expanded file to final list + tmp_file.insert(0, "R\"(\n") + tmp_file.append("\n)\"") + entry = FileEntry(target_name=file[1].target_name, file_contents=tmp_file) + final_files.append((file[0], entry)) + + # Write output files + for file in final_files: + with open(file[1].target_name.get_path(), 'w+') as out_file: + out_file.write( "\n".join( file[1].file_contents )) + +def create_version_file(target, source, env): +# Generate string with build options library version to embed in the library: + try: + git_hash = subprocess.check_output(["git", "rev-parse", "HEAD"]) + except (OSError, subprocess.CalledProcessError): + git_hash="unknown" + + version_filename = "%s/arm_compute_version.embed" % Dir("src/core").path + build_info = "\"arm_compute_version=%s Build options: %s Git hash=%s\"" % (VERSION, vars.args, git_hash.strip()) + with open(target[0].get_path(), "w") as fd: + fd.write(build_info) + + +arm_compute_env = env.Clone() + +generate_embed = [ arm_compute_env.Command("src/core/arm_compute_version.embed", "", action=create_version_file) ] +arm_compute_env.Append(CPPPATH =[Dir("./src/core/").path] ) + +if env["os"] not in ["android", "bare_metal"]: + arm_compute_env.Append(LIBS = ['pthread']) + +arm_compute_env.Append(LIBS = ['dl']) + +core_files = Glob('src/core/*.cpp') +core_files += Glob('src/core/CPP/*.cpp') +core_files += Glob('src/core/CPP/kernels/*.cpp') + +files = Glob('src/runtime/*.cpp') +# CLHarrisCorners uses the Scheduler to run CPP kernels +files += Glob('src/runtime/CPP/SingleThreadScheduler.cpp') + +if env['cppthreads']: + files += Glob('src/runtime/CPP/CPPScheduler.cpp') + +if env['openmp']: + files += Glob('src/runtime/OMP/OMPScheduler.cpp') + +if env['opencl']: + core_files += Glob('src/core/CL/*.cpp') + core_files += Glob('src/core/CL/kernels/*.cpp') + + files += Glob('src/runtime/CL/*.cpp') + files += Glob('src/runtime/CL/functions/*.cpp') + + # Generate embed files + if env['embed_kernels']: + cl_files = Glob('src/core/CL/cl_kernels/*.cl') + cl_files += Glob('src/core/CL/cl_kernels/*.h') + + embed_files = [ f.get_path()+"embed" for f in cl_files ] + arm_compute_env.Append(CPPPATH =[Dir("./src/core/CL/").path] ) + + generate_embed.append(arm_compute_env.Command(embed_files, cl_files, action=resolve_includes)) + +if env['neon']: + core_files += Glob('src/core/NEON/*.cpp') + core_files += Glob('src/core/NEON/kernels/*.cpp') + + files += Glob('src/runtime/NEON/*.cpp') + files += Glob('src/runtime/NEON/functions/*.cpp') + +static_core_objects = [arm_compute_env.StaticObject(f) for f in core_files] +shared_core_objects = [arm_compute_env.SharedObject(f) for f in core_files] + +arm_compute_core_a = build_library('arm_compute_core-static', static_core_objects, static=True) +Export('arm_compute_core_a') + +if env['os'] != 'bare_metal': + arm_compute_core_so = build_library('arm_compute_core', shared_core_objects, static=False) + Export('arm_compute_core_so') + +shared_objects = [arm_compute_env.SharedObject(f) for f in files] +static_objects = [arm_compute_env.StaticObject(f) for f in files] + +arm_compute_a = build_library('arm_compute-static', static_core_objects + static_objects, static=True) +Export('arm_compute_a') + +if env['os'] != 'bare_metal': + arm_compute_so = build_library('arm_compute', shared_core_objects + shared_objects, static=False) + Export('arm_compute_so') + +alias = arm_compute_env.Alias("arm_compute", [arm_compute_a, arm_compute_so]) +Default(alias) + +Default(generate_embed) +Depends([alias,arm_compute_core_so, arm_compute_core_a], generate_embed) diff --git a/SConstruct b/SConstruct index 862eec7..3927e3a 100644 --- a/SConstruct +++ b/SConstruct @@ -20,7 +20,20 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import SCons import os +import subprocess + +def version_at_least(version, required): + end = min(len(version), len(required)) + + for i in range(0, end, 2): + if int(version[i]) < int(required[i]): + return False + elif int(version[i]) > int(required[i]): + return True + + return True vars = Variables("scons") vars.AddVariables( @@ -29,7 +42,7 @@ vars.AddVariables( EnumVariable("arch", "Target Architecture", "armv7a", allowed_values=("armv7a", "arm64-v8a", "arm64-v8.2-a", "x86_32", "x86_64")), EnumVariable("os", "Target OS", "linux", allowed_values=("linux", "android", "bare_metal")), EnumVariable("build", "Build type", "cross_compile", allowed_values=("native", "cross_compile")), - BoolVariable("examples", "Build example programs", False), + BoolVariable("examples", "Build example programs", True), BoolVariable("Werror", "Enable/disable the -Werror compilation flag", True), BoolVariable("opencl", "Enable OpenCL support", True), BoolVariable("neon", "Enable Neon support", False), @@ -37,16 +50,159 @@ vars.AddVariables( BoolVariable("set_soname", "Set the library's soname and shlibversion (requires SCons 2.4 or above)", False), BoolVariable("openmp", "Enable OpenMP backend", False), BoolVariable("cppthreads", "Enable C++11 threads backend", True), - PathVariable("build_dir", "Specify sub-folder for the build", ".", PathVariable.PathIsDirCreate), + PathVariable("build_dir", "Specify sub-folder for the build", ".", PathVariable.PathAccept), ("extra_cxx_flags", "Extra CXX flags to be appended to the build command", "") ) -env = Environment(platform='posix', variables = vars, ENV = os.environ) +env = Environment(platform="posix", variables=vars, ENV = os.environ) + +SConsignFile('build/.%s' % env['build_dir']) Help(vars.GenerateHelpText(env)) +if env['neon'] and 'x86' in env['arch']: + print "Cannot compile NEON for x86" + Exit(1) + +if env['set_soname'] and not version_at_least(SCons.__version__, "2.4"): + print "Setting the library's SONAME / SHLIBVERSION requires SCons 2.4 or above" + print "Update your version of SCons or use set_soname=0" + Exit(1) + +if env['os'] == 'bare_metal': + if env['cppthreads'] or env['openmp']: + print("ERROR: OpenMP and C++11 threads not supported in bare_metal. Use cppthreads=0 openmp=0") + Exit(1) + +env.Append(CXXFLAGS = ['-Wno-deprecated-declarations','-Wall','-DARCH_ARM', + '-Wextra','-Wno-unused-parameter','-pedantic','-Wdisabled-optimization','-Wformat=2', + '-Winit-self','-Wstrict-overflow=2','-Wswitch-default', + '-fpermissive','-std=gnu++11','-Wno-vla','-Woverloaded-virtual', + '-Wctor-dtor-privacy','-Wsign-promo','-Weffc++','-Wno-format-nonliteral','-Wno-overlength-strings','-Wno-strict-overflow']) +env.Append(CPPDEFINES = ['_GLIBCXX_USE_NANOSLEEP']) + +if os.environ.get('CXX', 'g++') == 'clang++': + env.Append(CXXFLAGS = ['-Wno-format-nonliteral','-Wno-deprecated-increment-bool','-Wno-vla-extension','-Wno-mismatched-tags']) +else: + env.Append(CXXFLAGS = ['-Wlogical-op','-Wnoexcept','-Wstrict-null-sentinel']) + +if env['cppthreads']: + env.Append(CPPDEFINES = [('ARM_COMPUTE_CPP_SCHEDULER', 1)]) + +if env['openmp']: + if os.environ.get('CXX', 'g++') == 'clang++': + print "Clang does not support OpenMP. Use scheduler=cpp." + Exit(1) + + env.Append(CPPDEFINES = [('ARM_COMPUTE_OPENMP_SCHEDULER', 1)]) + env.Append(CXXFLAGS = ['-fopenmp']) + env.Append(LINKFLAGS = ['-fopenmp']) + +prefix = "" +if env['arch'] == 'armv7a': + env.Append(CXXFLAGS = ['-march=armv7-a', '-mthumb', '-mfpu=neon']) + + if env['os'] in ['linux', 'bare_metal']: + prefix = "arm-linux-gnueabihf-" + env.Append(CXXFLAGS = ['-mfloat-abi=hard']) + elif env['os'] == 'android': + prefix = "arm-linux-androideabi-" + env.Append(CXXFLAGS = ['-mfloat-abi=softfp']) +elif env['arch'] == 'arm64-v8a': + env.Append(CXXFLAGS = ['-march=armv8-a']) + + if env['os'] in ['linux', 'bare_metal']: + prefix = "aarch64-linux-gnu-" + elif env['os'] == 'android': + prefix = "aarch64-linux-android-" +elif env['arch'] == 'arm64-v8.2-a': + env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16+simd']) + env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FP16']) + + if env['os'] in ['linux', 'bare_metal']: + prefix = "aarch64-linux-gnu-" + elif env['os'] == 'android': + prefix = "aarch64-linux-android-" +elif env['arch'] == 'x86_32': + env.Append(CCFLAGS = ['-m32']) + env.Append(LINKFLAGS = ['-m32']) +elif env['arch'] == 'x86_64': + env.Append(CCFLAGS = ['-m64']) + env.Append(LINKFLAGS = ['-m64']) + +if env['build'] == 'native': + prefix = "" + +env['CC'] = prefix + os.environ.get('CC', 'gcc') +env['CXX'] = prefix + os.environ.get('CXX', 'g++') +env['LD'] = prefix + "ld" +env['AS'] = prefix + "as" +env['AR'] = prefix + "ar" +env['RANLIB'] = prefix + "ranlib" + +if not GetOption("help"): + try: + compiler_ver = subprocess.check_output([env['CXX'], "-dumpversion"]).strip() + except OSError: + print("ERROR: Compiler '%s' not found" % env['CXX']) + Exit(1) + + if os.environ.get('CXX','g++') == 'g++': + if env['arch'] == 'arm64-v8.2-a' and not version_at_least(compiler_ver, '6.2.1'): + print "GCC 6.2.1 or newer is required to compile armv8.2-a code" + Exit(1) + elif env['arch'] == 'arm64-v8a' and not version_at_least(compiler_ver, '4.9'): + print "GCC 4.9 or newer is required to compile NEON code for AArch64" + Exit(1) + + if version_at_least(compiler_ver, '6.1'): + env.Append(CXXFLAGS = ['-Wno-ignored-attributes']) + + if compiler_ver == '4.8.3': + env.Append(CXXFLAGS = ['-Wno-array-bounds']) + +if env['Werror']: + env.Append(CXXFLAGS = ['-Werror']) + +if env['os'] == 'android': + env.Append(CPPDEFINES = ['ANDROID']) + env.Append(LINKFLAGS = ['-pie', '-static-libstdc++']) +elif env['os'] == 'bare_metal': + env.Append(LINKFLAGS = ['-static']) + env.Append(CXXFLAGS = ['-fPIC']) + env.Append(CPPDEFINES = ['NO_MULTI_THREADING']) + +if env['opencl']: + if env['os'] == 'bare_metal': + print("Cannot link OpenCL statically, which is required on bare metal") + Exit(1) + + if env['embed_kernels']: + env.Append(CPPDEFINES = ['EMBEDDED_KERNELS']) + +if env['debug']: + env['asserts'] = True + env.Append(CXXFLAGS = ['-O0','-g','-gdwarf-2']) + env.Append(CPPDEFINES = ['ARM_COMPUTE_DEBUG_ENABLED']) +else: + env.Append(CXXFLAGS = ['-O3','-ftree-vectorize']) + +if env['asserts']: + env.Append(CPPDEFINES = ['ARM_COMPUTE_ASSERTS_ENABLED']) + +env.Append(CPPPATH = ['#/include', "#"]) +env.Append(CXXFLAGS = env['extra_cxx_flags']) + Export('vars') Export('env') +Export('version_at_least') -if not GetOption("help"): - SConscript('sconscript', variant_dir='#build/%s/arm_compute' % env['build_dir'], duplicate=0) +SConscript('./SConscript', variant_dir='#build/%s' % env['build_dir'], duplicate=0) + +if env['opencl']: + SConscript("./opencl-1.2-stubs/SConscript", variant_dir="build/%s/opencl-1.2-stubs" % env['build_dir'], duplicate=0) + +if env['examples']: + SConscript('./examples/SConscript', variant_dir='#build/%s/examples' % env['build_dir'], duplicate=0) + +SConscript('./tests/SConscript', variant_dir='#build/%s/tests' % env['build_dir'], duplicate=0) diff --git a/arm_compute/core/AccessWindowAutoPadding.h b/arm_compute/core/AccessWindowAutoPadding.h index ef058bc..0a3344b 100644 --- a/arm_compute/core/AccessWindowAutoPadding.h +++ b/arm_compute/core/AccessWindowAutoPadding.h @@ -32,7 +32,7 @@ namespace arm_compute { class Window; -class TensorInfo; +class ITensorInfo; /** Dummy access window. * @@ -51,7 +51,7 @@ public: * * @param[in,out] info Tensor info of the accessed kernel. */ - AccessWindowAutoPadding(TensorInfo *info); + AccessWindowAutoPadding(ITensorInfo *info); AccessWindowAutoPadding(const AccessWindowAutoPadding &) = delete; AccessWindowAutoPadding &operator=(const AccessWindowAutoPadding &) = delete; AccessWindowAutoPadding(AccessWindowAutoPadding &&) = default; @@ -70,7 +70,7 @@ public: ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; private: - TensorInfo *_info; + ITensorInfo *_info; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H__*/ diff --git a/arm_compute/core/AccessWindowStatic.h b/arm_compute/core/AccessWindowStatic.h index 9c26998..6dcba07 100644 --- a/arm_compute/core/AccessWindowStatic.h +++ b/arm_compute/core/AccessWindowStatic.h @@ -34,7 +34,7 @@ namespace arm_compute { class Window; -class TensorInfo; +class ITensorInfo; /** Implementation of a static rectangular access pattern. * @@ -54,7 +54,7 @@ public: * @param[in] end_x End of the access in X direction. * @param[in] end_y End of the access in Y direction. */ - AccessWindowStatic(TensorInfo *info, int start_x, int start_y, int end_x, int end_y); + AccessWindowStatic(ITensorInfo *info, int start_x, int start_y, int end_x, int end_y); AccessWindowStatic(const AccessWindowStatic &) = delete; AccessWindowStatic &operator=(const AccessWindowStatic &) = delete; @@ -82,11 +82,11 @@ public: bool update_padding_if_needed(const Window &window) const override; ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; - TensorInfo *_info; - int _start_x; - int _start_y; - int _end_x; - int _end_y; + ITensorInfo *_info; + int _start_x; + int _start_y; + int _end_x; + int _end_y; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_IACCESS_WINDOW_STATIC_H__*/ diff --git a/arm_compute/core/AccessWindowTranspose.h b/arm_compute/core/AccessWindowTranspose.h index 4276503..102860f 100644 --- a/arm_compute/core/AccessWindowTranspose.h +++ b/arm_compute/core/AccessWindowTranspose.h @@ -32,7 +32,7 @@ namespace arm_compute { class Window; -class TensorInfo; +class ITensorInfo; /** Implementation of a XY-transpose access pattern. */ class AccessWindowTranspose : public AccessWindowRectangle diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h index 230685c..26253e3 100644 --- a/arm_compute/core/CL/CLHelpers.h +++ b/arm_compute/core/CL/CLHelpers.h @@ -24,11 +24,22 @@ #ifndef __ARM_COMPUTE_CLHELPERS_H__ #define __ARM_COMPUTE_CLHELPERS_H__ +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Helpers.h" + #include namespace arm_compute { enum class DataType; +enum class GPUTarget; + +/** Enable operation operations on GPUTarget enumerations */ +template <> +struct enable_bitwise_ops +{ + static constexpr bool value = true; +}; /** Max vector width of an OpenCL vector */ static constexpr const unsigned int max_cl_vector_width = 16; @@ -40,5 +51,55 @@ static constexpr const unsigned int max_cl_vector_width = 16; * @return The string specifying the OpenCL type to be used. */ std::string get_cl_type_from_data_type(const DataType &dt); + +/** Translates a given gpu device target to string. + * + * @param[in] target Given gpu target. + * + * @return The string describing the target. + */ +const std::string &string_from_target(GPUTarget target); + +/** Helper function to create and return a unique_ptr pointed to a CL kernel object + * It also calls the kernel's configuration. + * + * @param[in] args All the arguments that need pass to kernel's configuration. + * + * @return A unique pointer pointed to a CL kernel object + */ +template +std::unique_ptr create_configure_kernel(T &&... args) +{ + std::unique_ptr k = arm_compute::cpp14::make_unique(); + k->configure(std::forward(args)...); + return k; +} + +/** Helper function to create and return a unique_ptr pointed to a CL kernel object + * + * @return A unique pointer pointed to a CL kernel object + */ +template +std::unique_ptr create_kernel() +{ + std::unique_ptr k = arm_compute::cpp14::make_unique(); + return k; +} + +/** Helper function to get the GPU target from CL device + * + * @param[in] device A CL device + * + * @return the GPU target + */ +GPUTarget get_target_from_device(cl::Device &device); + +/** Helper function to get the GPU arch + * + * @param[in] target GPU target + * + * @return the GPU target which shows the arch + */ +GPUTarget get_arch_from_target(GPUTarget target); } #endif diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h index 70789b2..0e9f356 100644 --- a/arm_compute/core/CL/CLKernels.h +++ b/arm_compute/core/CL/CLKernels.h @@ -30,6 +30,7 @@ #include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" #include "arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h" #include "arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h" +#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h" #include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h" #include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h" #include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h" @@ -41,7 +42,7 @@ #include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" #include "arm_compute/core/CL/kernels/CLColorConvertKernel.h" #include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" -#include "arm_compute/core/CL/kernels/CLConvolutionLayerWeightsReshapeKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h" #include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h" #include "arm_compute/core/CL/kernels/CLDerivativeKernel.h" #include "arm_compute/core/CL/kernels/CLDilateKernel.h" @@ -62,6 +63,7 @@ #include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" #include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" #include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" +#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" #include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" #include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" #include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h" @@ -83,5 +85,6 @@ #include "arm_compute/core/CL/kernels/CLTransposeKernel.h" #include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h" #include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h" +#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" #endif /* __ARM_COMPUTE_CLKERNELS_H__ */ diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h new file mode 100644 index 0000000..c5643d8 --- /dev/null +++ b/arm_compute/core/CL/CLTypes.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CL_TYPES_H__ +#define __ARM_COMPUTE_CL_TYPES_H__ + +namespace arm_compute +{ +/** Available GPU Targets */ +enum class GPUTarget +{ + GPU_ARCH_MASK = 0xF00, + MIDGARD = 0x100, + BIFROST = 0x200, + T600 = 0x110, + T700 = 0x120, + T800 = 0x130, + G70 = 0x210 +}; +} +#endif /* __ARM_COMPUTE_CL_TYPES_H__ */ diff --git a/arm_compute/core/CL/ICLHOG.h b/arm_compute/core/CL/ICLHOG.h new file mode 100644 index 0000000..a3d2fb4 --- /dev/null +++ b/arm_compute/core/CL/ICLHOG.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLHOG_H__ +#define __ARM_COMPUTE_ICLHOG_H__ + +#include "arm_compute/core/IHOG.h" + +#include + +namespace cl +{ +class Buffer; +class CommandQueue; +} + +namespace arm_compute +{ +/** Interface for OpenCL HOG data-object */ +class ICLHOG : public IHOG +{ +public: + /** Default constructor */ + ICLHOG(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLHOG(const ICLHOG &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLHOG &operator=(const ICLHOG &) = delete; + /** Allow instances of this class to be moved */ + ICLHOG(ICLHOG &&) = default; + /** Allow instances of this class to be moved */ + ICLHOG &operator=(ICLHOG &&) = default; + /** Default destructor */ + virtual ~ICLHOG() = default; + + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the hog's descriptor + * + * @return A reference to an OpenCL buffer containing the hog's descriptor + */ + virtual const cl::Buffer &cl_buffer() const = 0; + + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + void map(cl::CommandQueue &q, bool blocking = true); + + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q); + + /** Interface to be implemented by the child class to free the allocated cl buffer. + * + * @warning The buffer must have been allocated previously. Otherwise calling the function will fail. + */ + virtual void free() = 0; + + // Inherited methods overridden: + float *descriptor() const override; + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + virtual void do_unmap(cl::CommandQueue &q) = 0; + +private: + uint8_t *_mapping; +}; +} +#endif /*__ARM_COMPUTE_ICLHOG_H__ */ diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h index f2cbb2b..72c963d 100644 --- a/arm_compute/core/CL/ICLKernel.h +++ b/arm_compute/core/CL/ICLKernel.h @@ -24,6 +24,7 @@ #ifndef __ARM_COMPUTE_ICLKERNEL_H__ #define __ARM_COMPUTE_ICLKERNEL_H__ +#include "arm_compute/core/CL/CLTypes.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/IKernel.h" @@ -98,6 +99,24 @@ public: _kernel.setArg(idx++, value); } + /** Set the targeted GPU architecture + * + * @param[in] target The targeted GPU architecture + */ + void set_target(GPUTarget target); + + /** Set the targeted GPU architecture according to the CL device + * + * @param[in] device A CL device + */ + void set_target(cl::Device &device); + + /** Get the targeted GPU architecture + * + * @return The targeted GPU architecture. + */ + GPUTarget get_target() const; + private: /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx. * @@ -117,6 +136,7 @@ private: protected: cl::Kernel _kernel; /**< OpenCL kernel to run */ cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */ + GPUTarget _target; /**< The targeted GPU */ }; /** Add the kernel to the command queue with the given window. diff --git a/arm_compute/core/CL/ICLMultiHOG.h b/arm_compute/core/CL/ICLMultiHOG.h new file mode 100644 index 0000000..9f3c775 --- /dev/null +++ b/arm_compute/core/CL/ICLMultiHOG.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLMULTIHOG_H__ +#define __ARM_COMPUTE_ICLMULTIHOG_H__ + +#include "arm_compute/core/CL/ICLHOG.h" +#include "arm_compute/core/IMultiHOG.h" + +namespace arm_compute +{ +/** Interface for storing multiple HOG data-objects */ +class ICLMultiHOG : public IMultiHOG +{ +public: + /** Return a pointer to the requested OpenCL HOG model + * + * @param[in] index The index of the wanted OpenCL HOG model. + * + * @return A pointer pointed to the HOG model + */ + virtual ICLHOG *cl_model(size_t index) = 0; + /** Return a constant pointer to the requested OpenCL HOG model + * + * @param[in] index The index of the wanted OpenCL HOG model. + * + * @return A constant pointer pointed to the OpenCL HOG model + */ + virtual const ICLHOG *cl_model(size_t index) const = 0; + + // Inherited methods overridden: + IHOG *model(size_t index) override; + const IHOG *model(size_t index) const override; +}; +} +#endif /*__ARM_COMPUTE_ICLMULTIHOG_H__ */ diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h index 887d31f..490e705 100644 --- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h @@ -24,14 +24,14 @@ #ifndef __ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ #define __ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ -#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/CL/ICLSimple3DKernel.h" namespace arm_compute { class ICLTensor; /** Interface for the activation layer kernel. */ -class CLActivationLayerKernel : public ICLSimple2DKernel +class CLActivationLayerKernel : public ICLSimple3DKernel { public: /** Set the input and output tensor. diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h new file mode 100644 index 0000000..0888538 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the BatchNormalization layer kernel. + */ +class CLBatchNormalizationLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLBatchNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchNormalizationLayerKernel(const CLBatchNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchNormalizationLayerKernel &operator=(const CLBatchNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLBatchNormalizationLayerKernel(CLBatchNormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + CLBatchNormalizationLayerKernel &operator=(CLBatchNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~CLBatchNormalizationLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. Data types supported: F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * The rest are optional and used for representing batches. + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_mean; + const ICLTensor *_var; + const ICLTensor *_beta; + const ICLTensor *_gamma; + float _epsilon; +}; +} +#endif /*__ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h new file mode 100644 index 0000000..eda4c66 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class CLDepthConcatenateKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthConcatenateKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthConcatenateKernel(const CLDepthConcatenateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthConcatenateKernel &operator=(const CLDepthConcatenateKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthConcatenateKernel(CLDepthConcatenateKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthConcatenateKernel &operator=(CLDepthConcatenateKernel &&) = default; + /** Default destructor */ + ~CLDepthConcatenateKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor. Data types supported: F32. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + int _top_bottom; + int _left_right; +}; +} +#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h index f70a0ae..8d44a4c 100644 --- a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h @@ -76,6 +76,9 @@ public: * @param[out] output Output tensor. Data type supported: same as @p input */ void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; }; } #endif /* __ARM_COMPUTE_CLGEMMTRANSPOSE1XWKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h new file mode 100644 index 0000000..45a5aac --- /dev/null +++ b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H__ +#define __ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/Size2D.h" + +namespace arm_compute +{ +class ITensor; + +/** OpenCL kernel to perform HOG Orientation Binning */ +class CLHOGOrientationBinningKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGOrientationBinningKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGOrientationBinningKernel(const CLHOGOrientationBinningKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGOrientationBinningKernel &operator=(const CLHOGOrientationBinningKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGOrientationBinningKernel(CLHOGOrientationBinningKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGOrientationBinningKernel &operator=(CLHOGOrientationBinningKernel &&) = default; + /** Default destructor */ + ~CLHOGOrientationBinningKernel() = default; + + /** Initialise the kernel's inputs, output and HOG's metadata + * + * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. + * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 + * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[in] hog_info HOG's metadata + */ + void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input_magnitude; + const ICLTensor *_input_phase; + ICLTensor *_output; + Size2D _cell_size; +}; + +/** OpenCL kernel to perform HOG block normalization */ +class CLHOGBlockNormalizationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGBlockNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGBlockNormalizationKernel(const CLHOGBlockNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGBlockNormalizationKernel &operator=(const CLHOGBlockNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGBlockNormalizationKernel(CLHOGBlockNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGBlockNormalizationKernel &operator=(CLHOGBlockNormalizationKernel &&) = default; + /** Default destructor */ + ~CLHOGBlockNormalizationKernel() = default; + + /** Initialise the kernel's input, output and HOG's metadata + * + * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog_info HOG's metadata + */ + void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + Size2D _num_cells_per_block_stride; +}; +} +#endif /* __ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h new file mode 100644 index 0000000..47bd054 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGDETECTORKERNEL_H__ +#define __ARM_COMPUTE_CLHOGDETECTORKERNEL_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLHOG.h" +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/CL/OpenCL.h" + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform HOG detector kernel using linear SVM */ +class CLHOGDetectorKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGDetectorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetectorKernel(const CLHOGDetectorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetectorKernel &operator=(const CLHOGDetectorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGDetectorKernel(CLHOGDetectorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGDetectorKernel &operator=(CLHOGDetectorKernel &&) = default; + /** Default destructor */ + ~CLHOGDetectorKernel() = default; + + /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect + * + * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel + * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects + * @param[in] num_detection_windows Number of detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the hog->info()->block_stride() + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, + uint16_t idx_class = 0); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue); + +private: + const ICLTensor *_input; + ICLDetectionWindowArray *_detection_windows; + cl::Buffer *_num_detection_windows; +}; +} + +#endif /* __ARM_COMPUTE_CLHOGDETECTORKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h new file mode 100644 index 0000000..fda0327 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply each row of first tensor with low 2 dimensions of second tensor. + * + * @attention The second input tensor must have at least 2 dimensions (matrix) + * + */ +class CLLocallyConnectedMatrixMultiplyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLocallyConnectedMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLocallyConnectedMatrixMultiplyKernel(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLocallyConnectedMatrixMultiplyKernel &operator=(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLocallyConnectedMatrixMultiplyKernel(CLLocallyConnectedMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLocallyConnectedMatrixMultiplyKernel &operator=(CLLocallyConnectedMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input, output and alpha + * + * @param[in] input0 First input tensor. Data types supported: F32 + * @param[in] input1 Second input tensor. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h index dd96aae..6fbbe95 100644 --- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h +++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ -#define __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ +#ifndef __ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H__ +#define __ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H__ #include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/Types.h" @@ -70,4 +70,4 @@ private: }; } -#endif /*__ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ */ +#endif /*__ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLConvolutionLayerWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h similarity index 64% rename from arm_compute/core/CL/kernels/CLConvolutionLayerWeightsReshapeKernel.h rename to arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h index 0ad0c0d..1dc8a8b 100644 --- a/arm_compute/core/CL/kernels/CLConvolutionLayerWeightsReshapeKernel.h +++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h @@ -21,13 +21,52 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __ARM_COMPUTE_CLCONVOLUTIONLAYERWEIGHTSRESHAPEKERNEL_H__ -#define __ARM_COMPUTE_CLCONVOLUTIONLAYERWEIGHTSRESHAPEKERNEL_H__ +#ifndef __ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__ +#define __ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__ #include "arm_compute/core/CL/ICLKernel.h" namespace arm_compute { +class CLWeightsReshapeKernel : public ICLKernel +{ +public: + /** Constructor. + * + * @param[in] is_shared Flag to indicate whether the weights are shared or not. + */ + CLWeightsReshapeKernel(bool is_shared = false); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWeightsReshapeKernel(const CLWeightsReshapeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWeightsReshapeKernel &operator=(const CLWeightsReshapeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWeightsReshapeKernel(CLWeightsReshapeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWeightsReshapeKernel &operator=(CLWeightsReshapeKernel &&) = default; + /** Default destructor */ + ~CLWeightsReshapeKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: F16, F32 + * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input + * @param[out] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input + */ + void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output); + + // Inherited methods overridden: + virtual void run(const Window &window, cl::CommandQueue &queue) = 0; + +protected: + bool _is_shared; + const ICLTensor *_input; + const ICLTensor *_biases; + ICLTensor *_output; +}; + /** Interface for the weights reshape kernel used by convolution and fully connected layers. * * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. @@ -51,37 +90,25 @@ namespace arm_compute * \end{array} \right) * @f] */ -class CLConvolutionLayerWeightsReshapeKernel : public ICLKernel +class CLConvolutionLayerWeightsReshapeKernel : public CLWeightsReshapeKernel { public: /** Default constructor */ CLConvolutionLayerWeightsReshapeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLConvolutionLayerWeightsReshapeKernel(const CLConvolutionLayerWeightsReshapeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLConvolutionLayerWeightsReshapeKernel &operator=(const CLConvolutionLayerWeightsReshapeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLConvolutionLayerWeightsReshapeKernel(CLConvolutionLayerWeightsReshapeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLConvolutionLayerWeightsReshapeKernel &operator=(CLConvolutionLayerWeightsReshapeKernel &&) = default; - /** Default destructor */ - ~CLConvolutionLayerWeightsReshapeKernel() = default; - - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data types supported: F16, F32 - * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM]. Data types supported: Same as @p input - * @param[out] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input - */ - void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; +}; -private: - const ICLTensor *_input; - const ICLTensor *_biases; - ICLTensor *_output; +/** Interface for the weights reshape kernel used by locally connected layers. */ +class CLLocallyConnectedLayerWeightsReshapeKernel : public CLWeightsReshapeKernel +{ +public: + /** Default constructor */ + CLLocallyConnectedLayerWeightsReshapeKernel(); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; }; } -#endif /*__ARM_COMPUTE_CLCONVOLUTIONLAYERWEIGHTSRESHAPEKERNEL_H__ */ +#endif /*__ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__ */ diff --git a/arm_compute/core/CPP/CPPKernels.h b/arm_compute/core/CPP/CPPKernels.h index 213a9e6..1eabfa9 100644 --- a/arm_compute/core/CPP/CPPKernels.h +++ b/arm_compute/core/CPP/CPPKernels.h @@ -26,6 +26,7 @@ /* Header regrouping all the CPP kernels */ #include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" +#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" #include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" #endif /* __ARM_COMPUTE_CPPKERNELS_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEHOGNonMaximaSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h similarity index 61% rename from arm_compute/core/NEON/kernels/NEHOGNonMaximaSuppressionKernel.h rename to arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h index c602f06..bcb3026 100644 --- a/arm_compute/core/NEON/kernels/NEHOGNonMaximaSuppressionKernel.h +++ b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2017 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __ARM_COMPUTE_NEHOGNONMAXIMASUPPRESSIONKERNEL_H__ -#define __ARM_COMPUTE_NEHOGNONMAXIMASUPPRESSIONKERNEL_H__ +#ifndef __ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H__ +#define __ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H__ #include "arm_compute/core/IArray.h" #include "arm_compute/core/IHOG.h" @@ -31,26 +31,29 @@ namespace arm_compute { -/** NEON kernel to perform in-place computation of euclidean distance based non-maxima suppression for HOG +/** CPP kernel to perform in-place computation of euclidean distance on IDetectionWindowArray * - * @note This kernel is meant to be used alongside HOG and performs a non-maxima suppression on a - * HOG detection window. + * @note This kernel is meant to be used alongside HOG or other object detection algorithms to perform a non-maxima suppression on a + * IDetectionWindowArray */ -class NEHOGNonMaximaSuppressionKernel : public INEKernel +class CPPDetectionWindowNonMaximaSuppressionKernel : public ICPPKernel { public: /** Default constructor */ - NEHOGNonMaximaSuppressionKernel(); + CPPDetectionWindowNonMaximaSuppressionKernel(); /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGNonMaximaSuppressionKernel(const NEHOGNonMaximaSuppressionKernel &) = delete; + CPPDetectionWindowNonMaximaSuppressionKernel(const CPPDetectionWindowNonMaximaSuppressionKernel &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGNonMaximaSuppressionKernel &operator=(const NEHOGNonMaximaSuppressionKernel &) = delete; + CPPDetectionWindowNonMaximaSuppressionKernel &operator=(const CPPDetectionWindowNonMaximaSuppressionKernel &) = delete; /** Allow instances of this class to be moved */ - NEHOGNonMaximaSuppressionKernel(NEHOGNonMaximaSuppressionKernel &&) = default; + CPPDetectionWindowNonMaximaSuppressionKernel(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default; /** Allow instances of this class to be moved */ - NEHOGNonMaximaSuppressionKernel &operator=(NEHOGNonMaximaSuppressionKernel &&) = default; + CPPDetectionWindowNonMaximaSuppressionKernel &operator=(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default; /** Initialise the kernel's input, output and the euclidean minimum distance * + * @attention: If @ref CLDetectionWindowArray is passed to the kernel, the map() and unmap() methods @ref CLDetectionWindowArray must be called respectively before and after + * the run() method of @ref CPPDetectionWindowNonMaximaSuppressionKernel + * * @param[in, out] input_output Input/Output array of @ref DetectionWindow * @param[in] min_distance Radial Euclidean distance for non-maxima suppression */ @@ -66,4 +69,4 @@ private: }; } -#endif /* __ARM_COMPUTE_NEHOGNONMAXIMASUPPRESSIONKERNEL_H__ */ +#endif /* __ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H__ */ diff --git a/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h b/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h index dab0192..b7a7d9f 100644 --- a/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h +++ b/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h @@ -24,8 +24,8 @@ #ifndef __ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H__ #define __ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H__ +#include "arm_compute/core/CPP/ICPPKernel.h" #include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/INEKernel.h" #include #include @@ -33,7 +33,7 @@ namespace arm_compute { /** CPP kernel to perform sorting and euclidean distance */ -class CPPSortEuclideanDistanceKernel : public INEKernel +class CPPSortEuclideanDistanceKernel : public ICPPKernel { public: /** Default constructor */ @@ -63,7 +63,7 @@ private: const int32_t *_num_corner_candidates; /**< Number of corner candidates */ float _min_distance; /**< Radial Euclidean distance */ InternalKeypoint *_in_out; /**< Source array of InternalKeypoint */ - IKeyPointArray *_output; /**< Destination array of NEKeyPointArray */ + IKeyPointArray *_output; /**< Destination array of IKeyPointArray */ }; } // namespace arm_compute diff --git a/arm_compute/core/Coordinates.h b/arm_compute/core/Coordinates.h index c936265..3a99abb 100644 --- a/arm_compute/core/Coordinates.h +++ b/arm_compute/core/Coordinates.h @@ -37,7 +37,6 @@ namespace arm_compute class Coordinates : public Dimensions { public: -#ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */ /** Constructor to initialize the coordinates. * * @param[in] coords Values to initialize the dimensions. @@ -47,7 +46,6 @@ public: : Dimensions{ coords... } { } -#endif /** Allow instances of this class to be copy constructed */ constexpr Coordinates(const Coordinates &) = default; /** Allow instances of this class to be copied */ diff --git a/arm_compute/core/Dimensions.h b/arm_compute/core/Dimensions.h index d627517..b080435 100644 --- a/arm_compute/core/Dimensions.h +++ b/arm_compute/core/Dimensions.h @@ -44,7 +44,6 @@ public: /** Number of dimensions the tensor has */ static constexpr size_t num_max_dimensions = MAX_DIMS; -#ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */ /** Constructor to initialize the tensor shape. * * @param[in] dims Values to initialize the dimensions. @@ -54,17 +53,19 @@ public: : _id{ { dims... } }, _num_dimensions{ sizeof...(dims) } { } -#endif + /** Allow instances of this class to be copy constructed */ Dimensions(const Dimensions &) = default; + /** Allow instances of this class to be copied */ Dimensions &operator=(const Dimensions &) = default; + /** Allow instances of this class to be move constructed */ Dimensions(Dimensions &&) = default; + /** Allow instances of this class to be moved */ Dimensions &operator=(Dimensions &&) = default; - /** Pure virtual destructor */ - virtual ~Dimensions() = 0; + /** Accessor to set the value of one of the dimensions. * * @param[in] dimension Dimension for which the value is set. @@ -105,17 +106,36 @@ public: return _id[dimension]; } /** Returns the effective dimensionality of the tensor */ - inline unsigned int num_dimensions() const + unsigned int num_dimensions() const { return _num_dimensions; } /** Set number of dimensions */ - inline void set_num_dimensions(size_t num_dimensions) + void set_num_dimensions(size_t num_dimensions) { _num_dimensions = num_dimensions; } + /** Collapse dimensions. + * + * @param[in] first Dimensions into which the following @p n are collapsed. + * @param[in] n Number of dimensions to collapse into @p first. + */ + void collapse(size_t n, size_t first = 0) + { + ARM_COMPUTE_ERROR_ON(first + n > _id.size()); + + // Collapse dimensions into the first + _id[first] = std::accumulate(_id.cbegin() + first, _id.cbegin() + first + n, 1, std::multiplies()); + // Shift the remaining dimensions down + std::copy(_id.begin() + first + n, _id.end(), _id.begin() + first + 1); + // Reduce the number of dimensions + _num_dimensions -= n - 1; + // Fill the now empty dimensions with zero + std::fill(_id.begin() + _num_dimensions, _id.end(), 0); + } + /** Returns a read/write iterator that points to the first element in the dimension array. */ typename std::array::iterator begin() { @@ -148,13 +168,11 @@ public: } protected: + /** Protected destructor. */ + ~Dimensions() = default; + std::array _id; size_t _num_dimensions{ 0 }; }; - -template -inline Dimensions::~Dimensions() -{ -} } #endif /*__ARM_COMPUTE_DIMENSIONS_H__*/ diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h index a589501..c4c452b 100644 --- a/arm_compute/core/Error.h +++ b/arm_compute/core/Error.h @@ -48,6 +48,30 @@ */ #define ARM_COMPUTE_UNUSED(var) (void)(var) +#ifdef ARM_COMPUTE_DEBUG_ENABLED +/** Print the given message + * + * @param[in] ... Message to display + */ +#define ARM_COMPUTE_INFO(...) ::arm_compute::debug(__func__, __FILE__, __LINE__, __VA_ARGS__) // NOLINT +/** If the condition is true, the given message is printed + * + * @param[in] cond Condition to evaluate. + * @param[in] ... Message to print if cond is false. + */ +#define ARM_COMPUTE_INFO_ON_MSG(cond, ...) \ + do \ + { \ + if(cond) \ + { \ + ARM_COMPUTE_INFO(__VA_ARGS__); \ + } \ + } while(0) +#else /* ARM_COMPUTE_DEBUG_ENABLED */ +#define ARM_COMPUTE_INFO_ON_MSG(cond, ...) +#define ARM_COMPUTE_INFO(...) +#endif /* ARM_COMPUTE_DEBUG_ENABLED */ + #ifdef ARM_COMPUTE_ASSERTS_ENABLED /** If the condition is true, the given message is printed and an exception is thrown * @@ -121,6 +145,16 @@ namespace arm_compute * @param[in] ... Variable number of arguments of the message. */ [[noreturn]] void error(const char *function, const char *file, const int line, const char *msg, ...); + +/** Print a debug message + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] msg Message to display before aborting. + * @param[in] ... Variable number of arguments of the message. + */ +void debug(const char *function, const char *file, const int line, const char *msg, ...); } #endif /* __ARM_COMPUTE_ERROR_H__ */ diff --git a/arm_compute/core/FixedPoint.h b/arm_compute/core/FixedPoint.h new file mode 100644 index 0000000..925b494 --- /dev/null +++ b/arm_compute/core/FixedPoint.h @@ -0,0 +1,217 @@ +/* + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_FIXEDPOINT_H__ +#define __ARM_COMPUTE_FIXEDPOINT_H__ + +#include + +namespace arm_compute +{ +using qint8_t = int8_t; /**< 8 bit fixed point scalar value */ +using qint16_t = int16_t; /**< 16 bit fixed point scalar value */ +using qint32_t = int32_t; /**< 32 bit fixed point scalar value */ + +/** 8 bit fixed point scalar saturating shift left + * + * @param[in] a First 8 bit fixed point input + * @param[in] shift Shift amount + * + * @return The result of the 8 bit fixed point shift. The result is saturated in case of overflow + */ +qint8_t sqshl_qs8(qint8_t a, int shift); + +/** 8 bit fixed point scalar absolute value + * + * @param[in] a 8 bit fixed point input + * + * @return The result of the 8 bit fixed point absolute value + */ +qint8_t sabs_qs8(qint8_t a); + +/** 8 bit fixed point scalar add + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * + * @return The result of the 8 bit fixed point addition + */ +qint8_t sadd_qs8(qint8_t a, qint8_t b); + +/** 8 bit fixed point scalar saturating add + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * + * @return The result of the 8 bit fixed point addition. The result is saturated in case of overflow + */ +qint8_t sqadd_qs8(qint8_t a, qint8_t b); + +/** 16 bit fixed point scalar saturating add + * + * @param[in] a First 16 bit fixed point input + * @param[in] b Second 16 bit fixed point input + * + * @return The result of the 16 bit fixed point addition. The result is saturated in case of overflow + */ +qint16_t sqadd_qs16(qint16_t a, qint16_t b); + +/** 8 bit fixed point scalar subtraction + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * + * @return The result of the 8 bit fixed point subtraction + */ +qint8_t ssub_qs8(qint8_t a, qint8_t b); + +/** 8 bit fixed point scalar saturating subtraction + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * + * @return The result of the 8 bit fixed point subtraction. The result is saturated in case of overflow + */ +qint8_t sqsub_qs8(qint8_t a, qint8_t b); + +/** 8 bit fixed point scalar multiply + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point multiplication. + */ +qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position); + +/** 8 bit fixed point scalar saturating multiply + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point multiplication. The result is saturated in case of overflow + */ +qint8_t sqmul_qs8(qint8_t a, qint8_t b, int fixed_point_position); + +/** 8 bit fixed point scalar multiply long + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point multiplication long. The result is saturated in case of overflow + */ +qint16_t sqmull_qs8(qint8_t a, qint8_t b, int fixed_point_position); + +/** 16 bit fixed point scalar saturating multiply +* +* @param[in] a First 16 bit fixed point input +* @param[in] b Second 16 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 16 bit fixed point multiplication. The result is saturated in case of overflow +*/ +qint16_t sqmul_qs16(qint16_t a, qint16_t b, int fixed_point_position); + +/** 8 bit fixed point scalar inverse square root +* +* @param[in] a 8 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 8 bit fixed point inverse square root. +*/ +qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position); + +/** 8 bit fixed point scalar division +* +* @param[in] a First 8 bit fixed point input +* @param[in] b Second 8 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 8 bit fixed point division. +*/ +qint8_t sdiv_qs8(qint8_t a, qint8_t b, int fixed_point_position); + +/** 8 bit fixed point scalar exponential +* +* @param[in] a 8 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 8 bit fixed point exponential. +*/ +qint8_t sexp_qs8(qint8_t a, int fixed_point_position); + +/** 8 bit fixed point scalar logarithm +* +* @param[in] a 8 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 8 bit fixed point logarithm. +*/ +qint8_t slog_qs8(qint8_t a, int fixed_point_position); + +/** Convert an 8 bit fixed point to float + * + * @param[in] a Input to convert + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion 8 bit fixed point -> float + */ +float scvt_f32_qs8(qint8_t a, int fixed_point_position); + +/** Convert a float to 8 bit fixed point + * + * @param[in] a Input to convert + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion float -> 8 bit fixed point + */ +qint8_t scvt_qs8_f32(float a, int fixed_point_position); + +/** Convert a 16 bit fixed point to float + * + * @param[in] a Input to convert + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion 16 bit fixed point -> float + */ +float scvt_f32_qs16(qint16_t a, int fixed_point_position); + +/** Convert a float to 16 bit fixed point + * + * @param[in] a Input to convert + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion float -> 16 bit fixed point + */ +qint8_t scvt_qs16_f32(float a, int fixed_point_position); + +/** Scalar saturating move and narrow. + * + * @param[in] a Input to convert to 8 bit fixed point + * + * @return The narrowing conversion to 8 bit + */ +qint8_t sqmovn_qs16(qint16_t a); +} +#include "arm_compute/core/FixedPoint.inl" +#endif /* __ARM_COMPUTE_FIXEDPOINT_H__ */ diff --git a/arm_compute/core/FixedPoint.inl b/arm_compute/core/FixedPoint.inl new file mode 100644 index 0000000..4263a6f --- /dev/null +++ b/arm_compute/core/FixedPoint.inl @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include + +namespace +{ +template +inline TpSat saturate_convert(TpIn a) +{ + if(a > std::numeric_limits::max()) + { + a = std::numeric_limits::max(); + } + if(a < std::numeric_limits::min()) + { + a = std::numeric_limits::min(); + } + return static_cast(a); +} +} // namespace + +namespace arm_compute +{ +inline qint8_t sqshl_qs8(qint8_t a, int shift) +{ + qint16_t tmp = static_cast(a) << shift; + // Saturate the result in case of overflow and cast to qint8_t + return saturate_convert(tmp); +} + +inline qint8_t sabs_qs8(qint8_t a) +{ + return a & 0x7F; +} + +inline qint8_t sadd_qs8(qint8_t a, qint8_t b) +{ + return a + b; +} + +inline qint8_t sqadd_qs8(qint8_t a, qint8_t b) +{ + // We need to store the temporary result in qint16_t otherwise we cannot evaluate the overflow + qint16_t tmp = (static_cast(a) + static_cast(b)); + + // Saturate the result in case of overflow and cast to qint8_t + return saturate_convert(tmp); +} + +inline qint16_t sqadd_qs16(qint16_t a, qint16_t b) +{ + // We need to store the temporary result in qint16_t otherwise we cannot evaluate the overflow + qint32_t tmp = (static_cast(a) + static_cast(b)); + + // Saturate the result in case of overflow and cast to qint16_t + return saturate_convert(tmp); +} + +inline qint8_t ssub_qs8(qint8_t a, qint8_t b) +{ + return a - b; +} + +inline qint8_t sqsub_qs8(qint8_t a, qint8_t b) +{ + // We need to store the temporary result in uint16_t otherwise we cannot evaluate the overflow + qint16_t tmp = static_cast(a) - static_cast(b); + + // Saturate the result in case of overflow and cast to qint8_t + return saturate_convert(tmp); +} + +inline qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position) +{ + const qint16_t round_up_const = (1 << (fixed_point_position - 1)); + + qint16_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return static_cast(tmp >> fixed_point_position); +} + +inline qint8_t sqmul_qs8(qint8_t a, qint8_t b, int fixed_point_position) +{ + const qint16_t round_up_const = (1 << (fixed_point_position - 1)); + + qint16_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return saturate_convert(tmp >> fixed_point_position); +} + +inline qint16_t sqmul_qs16(qint16_t a, qint16_t b, int fixed_point_position) +{ + const qint32_t round_up_const = (1 << (fixed_point_position - 1)); + + qint32_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return saturate_convert(tmp >> fixed_point_position); +} + +inline qint16_t sqmull_qs8(qint8_t a, qint8_t b, int fixed_point_position) +{ + const qint16_t round_up_const = (1 << (fixed_point_position - 1)); + + qint16_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return tmp >> fixed_point_position; +} + +inline qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position) +{ + qint8_t shift = 8 - (fixed_point_position + (__builtin_clz(a) - 24)); + + qint8_t const_three = (3 << fixed_point_position); + qint8_t temp = shift < 0 ? (a << -shift) : (a >> shift); + qint8_t x2 = temp; + + // We need three iterations to find the result + for(int i = 0; i < 3; i++) + { + qint8_t three_minus_dx = ssub_qs8(const_three, smul_qs8(temp, smul_qs8(x2, x2, fixed_point_position), fixed_point_position)); + x2 = (smul_qs8(x2, three_minus_dx, fixed_point_position) >> 1); + } + + temp = shift < 0 ? (x2 << (-shift >> 1)) : (x2 >> (shift >> 1)); + + return temp; +} + +inline qint8_t sdiv_qs8(qint8_t a, qint8_t b, int fixed_point_position) +{ + qint16_t temp = a << fixed_point_position; + return (qint8_t)(temp / b); +} + +inline qint8_t sqexp_qs8(qint8_t a, int fixed_point_position) +{ + // Constants + qint8_t const_one = (1 << fixed_point_position); + qint8_t ln2 = ((0x58 >> (6 - fixed_point_position)) + 1) >> 1; + qint8_t inv_ln2 = (((0x38 >> (6 - fixed_point_position)) + 1) >> 1) | const_one; + qint8_t A = ((0x7F >> (6 - fixed_point_position)) + 1) >> 1; + qint8_t B = ((0x3F >> (6 - fixed_point_position)) + 1) >> 1; + qint8_t C = ((0x16 >> (6 - fixed_point_position)) + 1) >> 1; + qint8_t D = ((0x05 >> (6 - fixed_point_position)) + 1) >> 1; + + // Polynomial expansion + int dec_a = (sqmul_qs8(a, inv_ln2, fixed_point_position) >> fixed_point_position); + qint8_t alpha = sabs_qs8(sqsub_qs8(a, sqmul_qs8(ln2, sqshl_qs8(dec_a, fixed_point_position), fixed_point_position))); + qint8_t sum = sqadd_qs8(sqmul_qs8(alpha, D, fixed_point_position), C); + sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), B); + sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), A); + sum = sqmul_qs8(alpha, sum, fixed_point_position); + sum = sqadd_qs8(sum, const_one); + + return (dec_a < 0) ? (sum >> -dec_a) : sqshl_qs8(sum, dec_a); +} + +inline qint8_t slog_qs8(qint8_t a, int fixed_point_position) +{ + // Constants + qint8_t const_one = (1 << fixed_point_position); + qint8_t ln2 = (0x58 >> (7 - fixed_point_position)); + qint8_t A = (0x5C >> (7 - fixed_point_position - 1)); + qint8_t B = -(0x56 >> (7 - fixed_point_position)); + qint8_t C = (0x29 >> (7 - fixed_point_position)); + qint8_t D = -(0x0A >> (7 - fixed_point_position)); + + if((const_one == a) || (a < 0)) + { + return 0; + } + else if(a < const_one) + { + return -slog_qs8(sdiv_qs8(const_one, a, fixed_point_position), fixed_point_position); + } + + // Remove even powers of 2 + qint8_t shift_val = 31 - __builtin_clz(a >> fixed_point_position); + a >>= shift_val; + a = ssub_qs8(a, const_one); + + // Polynomial expansion + auto sum = sqadd_qs8(sqmul_qs8(a, D, fixed_point_position), C); + sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), B); + sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), A); + sum = sqmul_qs8(a, sum, fixed_point_position); + + return smul_qs8(sadd_qs8(sum, shift_val << fixed_point_position), ln2, fixed_point_position); +} + +inline float scvt_f32_qs8(qint8_t a, int fixed_point_position) +{ + return static_cast(a) / (1 << fixed_point_position); +} + +inline qint8_t scvt_qs8_f32(float a, int fixed_point_position) +{ + // round_nearest_integer(a * 2^(fixed_point_position)) + return static_cast(static_cast(a) * (1 << fixed_point_position) + 0.5f); +} + +inline float scvt_f32_qs16(qint16_t a, int fixed_point_position) +{ + return static_cast(a) / (1 << fixed_point_position); +} + +inline qint8_t scvt_qs16_f32(float a, int fixed_point_position) +{ + // round_nearest_integer(a * 2^(fixed_point_position)) + return static_cast(static_cast(a) * (1 << fixed_point_position) + 0.5f); +} + +inline qint8_t sqmovn_qs16(qint16_t a) +{ + // Saturate the result in case of overflow and cast to qint8_t + return saturate_convert(a); +} +} diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h index a84ce2c..07318ea 100644 --- a/arm_compute/core/Helpers.h +++ b/arm_compute/core/Helpers.h @@ -24,13 +24,14 @@ #ifndef __ARM_COMPUTE_HELPERS_H__ #define __ARM_COMPUTE_HELPERS_H__ +#include "arm_compute/core/CL/CLTypes.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Steps.h" #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" #include "arm_compute/core/Window.h" - #include #include #include @@ -43,11 +44,10 @@ namespace arm_compute { class IKernel; class ITensor; -class TensorInfo; +class ITensorInfo; namespace cpp14 { -#ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */ template struct _Unique_if { @@ -84,12 +84,43 @@ make_unique(size_t n) template typename _Unique_if::_Known_bound make_unique(Args &&...) = delete; -#endif /* DOXYGEN_SKIP_THIS */ } + +template +struct enable_bitwise_ops +{ + static constexpr bool value = false; +}; + +template +typename std::enable_if::value, T>::type operator&(T lhs, T rhs) +{ + using underlying_type = typename std::underlying_type::type; + return static_cast(static_cast(lhs) & static_cast(rhs)); } -namespace +namespace traits +{ +/** Check if a type T is contained in a tuple Tuple of types */ +template +struct is_contained; + +template +struct is_contained> : std::false_type +{ +}; + +template +struct is_contained> : std::true_type { +}; + +template +struct is_contained> : is_contained> +{ +}; +} + /** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between * the real coordinates and the smallest following integer coordinates. * @@ -215,10 +246,7 @@ inline I foldl(F &&func, I &&initial, T &&value, Vs &&... values) { return foldl(std::forward(func), func(std::forward(initial), std::forward(value)), std::forward(values)...); } -} -namespace arm_compute -{ /** Iterator updated by @ref execute_window_loop for each window element */ class Iterator { @@ -334,7 +362,7 @@ bool update_window_and_padding(Window &win, Ts &&... patterns) * * @return The maximum window the kernel can be executed on. */ -Window calculate_max_window(const TensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); +Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); /** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting * @@ -345,7 +373,17 @@ Window calculate_max_window(const TensorInfo &info, const Steps &steps = Steps() * * @return The maximum window the kernel can be executed on. */ -Window calculate_max_window_horizontal(const TensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); +Window calculate_max_window_horizontal(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); + +/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border. + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] border_size (Optional) Border size. The border region will be included in the window. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps &steps = Steps(), BorderSize border_size = BorderSize()); /** Intersect multiple valid regions. * @@ -386,7 +424,7 @@ ValidRegion intersect_valid_regions(Ts &&... regions) * calculated based on the tensor shape and the strides of lower dimensions. */ template -inline Strides compute_strides(const TensorInfo &info, T stride_x, Ts &&... fixed_strides) +inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fixed_strides) { const TensorShape &shape = info.tensor_shape(); @@ -408,11 +446,62 @@ inline Strides compute_strides(const TensorInfo &info, T stride_x, Ts &&... fixe * @return Strides object based on element size and tensor shape. */ template -inline Strides compute_strides(const TensorInfo &info) +inline Strides compute_strides(const ITensorInfo &info) { return compute_strides(info, info.element_size()); } -} + +/* Auto initialize the tensor info (shape, number of channels, data type and fixed point position) if the current assignment is empty. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] shape New shape. + * @param[in] num_channels New number of channels. + * @param[in] data_type New data type + * @param[in] fixed_point_position New fixed point position + * + * @return True if the tensor info has been initialized + */ +bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, int fixed_point_position); + +/* Set the shape to the specified value if the current assignment is empty. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] shape New shape. + * + * @return True if the shape has been changed. + */ +bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape); + +/* Set the format, data type and number of channels to the specified value if + * the current data type is unknown. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] format New format. + * + * @return True if the format has been changed. + */ +bool set_format_if_unknown(ITensorInfo &info, Format format); + +/* Set the data type and number of channels to the specified value if + * the current data type is unknown. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] data_type New data type. + * + * @return True if the data type has been changed. + */ +bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type); + +/* Set the fixed point position to the specified value if + * the current fixed point position is 0 and the data type is QS8 or QS16 + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] fixed_point_position New fixed point position + * + * @return True if the fixed point position has been changed. + */ +bool set_fixed_point_position_if_zero(ITensorInfo &info, int fixed_point_position); +} // namespace arm_compute #include "arm_compute/core/Helpers.inl" #endif /*__ARM_COMPUTE_HELPERS_H__ */ diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl index 4aa7acf..f885810 100644 --- a/arm_compute/core/Helpers.inl +++ b/arm_compute/core/Helpers.inl @@ -27,7 +27,7 @@ #include #include -namespace +namespace arm_compute { inline uint8_t delta_bilinear_c1u8(const uint8_t *pixel_ptr, size_t stride, float dx, float dy) { @@ -122,11 +122,7 @@ inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stri // Return average return sum / (x_elements * y_elements); } -} -#ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */ -namespace arm_compute -{ template struct IncrementIterators { @@ -143,6 +139,11 @@ struct IncrementIterators it.increment(dimension); // End of recursion } + + static void unroll() + { + // End of recursion + } }; template @@ -189,7 +190,7 @@ inline Iterator::Iterator(const ITensor *tensor, const Window &win) : Iterator() { ARM_COMPUTE_ERROR_ON(tensor == nullptr); - const TensorInfo *info = tensor->info(); + const ITensorInfo *info = tensor->info(); ARM_COMPUTE_ERROR_ON(info == nullptr); const Strides &strides = info->strides_in_bytes(); @@ -244,5 +245,62 @@ inline void Iterator::reset(const size_t dimension) _dims[n]._dim_start = _dims[dimension]._dim_start; } } + +inline bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, int fixed_point_position) +{ + if(info.tensor_shape().total_size() == 0) + { + info.set_data_type(data_type); + info.set_tensor_shape(shape); + info.set_num_channels(num_channels); + info.set_fixed_point_position(fixed_point_position); + return true; + } + + return false; +} + +inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape) +{ + if(info.tensor_shape().total_size() == 0) + { + info.set_tensor_shape(shape); + return true; + } + + return false; +} + +inline bool set_format_if_unknown(ITensorInfo &info, Format format) +{ + if(info.data_type() == DataType::UNKNOWN) + { + info.set_format(format); + return true; + } + + return false; +} + +inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type) +{ + if(info.data_type() == DataType::UNKNOWN) + { + info.set_data_type(data_type); + return true; + } + + return false; +} + +inline bool set_fixed_point_position_if_zero(ITensorInfo &info, int fixed_point_position) +{ + if(info.fixed_point_position() == 0 && (info.data_type() == DataType::QS8 || info.data_type() == DataType::QS16)) + { + info.set_fixed_point_position(fixed_point_position); + return true; + } + + return false; } -#endif /* DOXYGEN_SKIP_THIS */ +} // namespace arm_compute diff --git a/arm_compute/core/IAccessWindow.h b/arm_compute/core/IAccessWindow.h index 3b905ed..cf7490d 100644 --- a/arm_compute/core/IAccessWindow.h +++ b/arm_compute/core/IAccessWindow.h @@ -33,7 +33,7 @@ namespace arm_compute { class Window; -class TensorInfo; +class ITensorInfo; /** Decrease @p required in steps of @p step until it's less than @p available. * @@ -112,7 +112,7 @@ public: * @param[in] width Number of elements that are accessed in X direction. * @param[in] height Number of elements that are accessed in Y direction. */ - AccessWindowRectangle(TensorInfo *info, int x, int y, int width, int height) + AccessWindowRectangle(ITensorInfo *info, int x, int y, int width, int height) : AccessWindowRectangle(info, x, y, width, height, 1.f, 1.f) { } @@ -129,7 +129,7 @@ public: * @param[in] scale_x Ratio along the X direction between the window used by the execute_window_loop and the rectangular access pattern defined * @param[in] scale_y Ratio along the Y direction between the window used by the execute_window_loop and the rectangular access pattern defined */ - AccessWindowRectangle(TensorInfo *info, int x, int y, int width, int height, float scale_x, float scale_y) + AccessWindowRectangle(ITensorInfo *info, int x, int y, int width, int height, float scale_x, float scale_y) : _info(info), _x(x), _y(y), _width(width), _height(height), _scale_x(scale_x), _scale_y(scale_y) { ARM_COMPUTE_ERROR_ON(width < 0); @@ -171,13 +171,13 @@ public: bool update_padding_if_needed(const Window &window) const override; protected: - TensorInfo *_info; - int _x; - int _y; - int _width; - int _height; - float _scale_x; - float _scale_y; + ITensorInfo *_info; + int _x; + int _y; + int _width; + int _height; + float _scale_x; + float _scale_y; }; /** Implementation of a column access pattern. */ @@ -193,7 +193,7 @@ public: * @param[in] height Number of elements that are accessed in Y direction. * @param[in] scale_y Ratio along the Y direction between the window used by the execute_window_loop and the rectangular access pattern defined */ - AccessWindowVertical(TensorInfo *info, int y, int height, float scale_y = 1.f) + AccessWindowVertical(ITensorInfo *info, int y, int height, float scale_y = 1.f) : AccessWindowRectangle(info, 0, y, 1, height, 1.f, scale_y) { ARM_COMPUTE_ERROR_ON(height < 0); @@ -214,7 +214,7 @@ public: * @param[in] width Number of elements that are accessed in X direction. * @param[in] scale_x Ratio along the X direction between the window used by the execute_window_loop and the rectangular access pattern defined */ - AccessWindowHorizontal(TensorInfo *info, int x, int width, float scale_x = 1.f) + AccessWindowHorizontal(ITensorInfo *info, int x, int width, float scale_x = 1.f) : AccessWindowRectangle(info, x, 0, width, 1, scale_x, 1.f) { ARM_COMPUTE_ERROR_ON(width < 0); diff --git a/arm_compute/core/ITensor.h b/arm_compute/core/ITensor.h index 55464a7..202b50a 100644 --- a/arm_compute/core/ITensor.h +++ b/arm_compute/core/ITensor.h @@ -40,12 +40,12 @@ public: * * @return A pointer to the tensor's metadata. */ - virtual TensorInfo *info() const = 0; + virtual ITensorInfo *info() const = 0; /** Interface to be implemented by the child class to return the tensor's metadata * * @return A pointer to the tensor's metadata. */ - virtual TensorInfo *info() = 0; + virtual ITensorInfo *info() = 0; /** Default virtual destructor */ virtual ~ITensor() = default; /** Interface to be implemented by the child class to return a pointer to CPU memory diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h new file mode 100644 index 0000000..bb3ac6e --- /dev/null +++ b/arm_compute/core/ITensorInfo.h @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ITENSORINFO_H__ +#define __ARM_COMPUTE_ITENSORINFO_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Strides.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" + +#include + +namespace arm_compute +{ +/** Store the tensor's metadata */ +class ITensorInfo +{ +public: + /** Default virtual destructor */ + virtual ~ITensorInfo() = default; + /** Set the data type to the specified value. + * + * @warning This resets the format to UNKNOWN. + * + * @param[in] data_type The new data type. + */ + virtual void set_data_type(DataType data_type) = 0; + /** Set the number of channels to the specified value. + * + * @warning This resets the format to UNKNOWN. + * + * @param[in] num_channels New number of channels. + */ + virtual void set_num_channels(int num_channels) = 0; + /** Set the format of an already initialized tensor. + * + * @note If the data type has already been configured (i.e. not UNKNOWN) it + * must match the new format. If data type hasn't been configured it will + * be based on the format. + * + * @param[in] format Single-plane format of the tensor. + */ + virtual void set_format(Format format) = 0; + /** Set the shape of an already initialized tensor. + * + * @warning Changing the shape requires to recompute the strides and is + * therefore only possible if the tensor hasn't been allocated yet. + * + * @param[in] shape New tensor shape. + */ + virtual void set_tensor_shape(TensorShape shape) = 0; + /** Set the fixed point position to the specified value + * + * @warning The fixed point position must be set once the data type has been configured + * + * @param[in] fixed_point_position The new fixed point position + */ + virtual void set_fixed_point_position(int fixed_point_position) = 0; + /** Update the offset to the first element and the strides to automatically computed values. + * + * @note The padding used by this method is really conservative so that the tensor can be used for most functions. + * + * @return True if the strides or the offset to the first element have changed. + */ + virtual bool auto_padding() = 0; + /** Update the offset to the first element, the strides and the total size. + * + * @note This function can only increase the offset, strides and total size. + * + * @param[in] padding Padding around the XY plane in number of elements. + * + * @return True if the strides, offset and total size have changed. + */ + virtual bool extend_padding(const PaddingSize &padding) = 0; + /** Return the size of the requested dimension + * + * @param[in] index Index of the dimension + * + * @return Dimension of the requested dimension + */ + virtual size_t dimension(size_t index) const = 0; + /** The strides in bytes for accessing each dimension of the tensor + * + * @return Strides in bytes for each tensor dimension + */ + virtual const Strides &strides_in_bytes() const = 0; + /** The offset from the beginning of the memory allocation to the first element of the tensor. + * This can be used to access efficiently elements in a 2D tensor + * + * @return The offset in bytes to access the first element of the tensor. + */ + virtual size_t offset_first_element_in_bytes() const = 0; + /** The offset in bytes from the beginning of the memory allocation to access the element at position (x, y, z ...) + * + * @param[in] pos Vector with the coordinates of the element to access. + * The size of this vector must be equal to the number of dimensions of the tensor + * + * @return Offset in bytes from the beginning of the memory allocation to access the element (x, y, z, ...) + */ + virtual size_t offset_element_in_bytes(const Coordinates &pos) const = 0; + /** Fixed point position used when the tensor data type is QS8 or QS16 + * + * @return The fixed point position that expresses the number of bits for the fractional part of the number + */ + virtual int fixed_point_position() const = 0; + /** Element size in bytes calculated as data_size() * num_channels() + * + * @return The size of one element in bytes + */ + virtual size_t element_size() const = 0; + /** The number of dimensions of the tensor (rank) + * + * @return The number of dimensions of the tensor (rank) + */ + virtual size_t num_dimensions() const = 0; + /** The number of channels for each tensor element + * + * @return The number of channels for each tensor element + */ + virtual size_t num_channels() const = 0; + /** Size for each dimension of the tensor + * + * @return A vector with the size for each dimension of the tensor + */ + virtual const TensorShape &tensor_shape() const = 0; + /** Data type used for each element of the tensor + * + * @return Tensor data type + */ + virtual DataType data_type() const = 0; + /** Colour format of the image + * + * @return Colour format of the image + */ + virtual Format format() const = 0; + /** Returns the total size of the tensor in bytes. + * + * @return Total size of the tensor in bytes. + */ + virtual size_t total_size() const = 0; + /** Padding of tensor. + * + * @return Padding. + */ + virtual PaddingSize padding() const = 0; + /** Checks if the tensor has been allocated with padding or not. + * + * @return True if padding is allocated in the tensor, otherwise false. + */ + virtual bool has_padding() const = 0; + /** Flag indicating whether the size of the tensor can be changed. + * + * @return True if the tensor size can be changed. + */ + virtual bool is_resizable() const = 0; + /** Set the flag whether the tensor size can be changed. + * + * @param[in] is_resizable Flag that marks the tensor if it can be changed or not. + */ + virtual void set_is_resizable(bool is_resizable) = 0; + /** Valid region of the tensor. All elements in the valid region have defined values, i.e. are not undefined. + * + * @return The valid region. + */ + virtual ValidRegion valid_region() const = 0; + /** Set the valid region of the tensor. + * + * @param[in] valid_region Valid region to set. + */ + virtual void set_valid_region(ValidRegion valid_region) = 0; +}; +} +#endif /*__ARM_COMPUTE_TENSORINFO_H__ */ diff --git a/arm_compute/core/NEON/NEFixedPoint.h b/arm_compute/core/NEON/NEFixedPoint.h new file mode 100644 index 0000000..fb71261 --- /dev/null +++ b/arm_compute/core/NEON/NEFixedPoint.h @@ -0,0 +1,686 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFIXEDPOINT_H__ +#define __ARM_COMPUTE_NEFIXEDPOINT_H__ + +#include "arm_compute/core/FixedPoint.h" + +#include + +namespace arm_compute +{ +using qint8x8_t = int8x8_t; /**< 8 bit fixed point vector with 8 elements */ +using qint8x8x2_t = int8x8x2_t; /**< 8 bit fixed point vector with 16 elements */ +using qint8x8x3_t = int8x8x3_t; /**< 8 bit fixed point vector with 24 elements */ +using qint8x8x4_t = int8x8x4_t; /**< 8 bit fixed point vector with 32 elements */ +using qint8x16_t = int8x16_t; /**< 8 bit fixed point vector with 16 elements */ +using qint8x16x2_t = int8x16x2_t; /**< 8 bit fixed point vector with 32 elements */ +using qint8x16x3_t = int8x16x3_t; /**< 8 bit fixed point vector with 48 elements */ +using qint8x16x4_t = int8x16x4_t; /**< 8 bit fixed point vector with 64 elements */ +using qint16x4_t = int16x4_t; /**< 16 bit fixed point vector with 4 elements */ +using qint16x4x2_t = int16x4x2_t; /**< 16 bit fixed point vector with 8 elements */ +using qint16x4x3_t = int16x4x3_t; /**< 16 bit fixed point vector with 12 elements */ +using qint16x4x4_t = int16x4x4_t; /**< 16 bit fixed point vector with 16 elements */ +using qint16x8_t = int16x8_t; /**< 16 bit fixed point vector with 8 elements */ +using qint16x8x2_t = int16x8x2_t; /**< 16 bit fixed point vector with 16 elements */ +using qint16x8x3_t = int16x8x3_t; /**< 16 bit fixed point vector with 24 elements */ +using qint16x8x4_t = int16x8x4_t; /**< 16 bit fixed point vector with 32 elements */ + +/** Get the lower half of a 16 elements vector + * + * @param[in] a vector of 16 elements + * + * @return 8 bit fixed point vector (8 elements) + */ +qint8x8_t vget_low_qs8(qint8x16_t a); + +/** Get the higher half of a 16 elements vector + * + * @param[in] a vector of 16 elements + * + * @return 8 bit fixed point vector (8 elements) + */ +qint8x8_t vget_high_qs8(qint8x16_t a); + +/** Load a single 8 bit fixed point vector from memory (8 elements) + * + * @param[in] addr Memory address of the 8 bit fixed point vector to load + * + * @return 8 bit fixed point vector (8 elements) + */ +qint8x8_t vld1_qs8(const qint8_t *addr); + +/** Load a single 8 bit fixed point vector from memory (16 elements) + * + * @param[in] addr Memory address of the 8 bit fixed point vector to load + * + * @return 8 bit fixed point vector (16 elements) + */ +qint8x16_t vld1q_qs8(const qint8_t *addr); + +/** Load a single 16 bit fixed point vector from memory (4 elements) + * + * @param[in] addr Memory address of the 16 bit fixed point vector to load + * + * @return 16 bit fixed point vector (4 elements) + */ +qint16x4_t vld1_qs16(const qint16_t *addr); + +/** Load a single 16 bit fixed point vector from memory (8 elements) + * + * @param[in] addr Memory address of the 16 bit fixed point vector to load + * + * @return 16 bit fixed point vector (8 elements) + */ +qint16x8_t vld1q_qs16(const qint16_t *addr); + +/** Load all lanes of 8 bit fixed point vector with same value from memory (8 elements) + * + * @param[in] addr Memory address of the 8 bit fixed point scalar value to load + * + * @return 8 bit fixed point vector (8 elements) + */ +qint8x8_t vld1_dup_qs8(const qint8_t *addr); + +/** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements) + * + * @param[in] addr Memory address of the 8 bit fixed point scalar value to load + * + * @return 8 bit fixed point vector (16 elements) + */ +qint8x16_t vld1q_dup_qs8(const qint8_t *addr); + +/** Store a single 8 bit fixed point vector to memory (8 elements) + * + * @param[in] addr Memory address where the 8 bit fixed point vector should be stored + * @param[in] b 8 bit fixed point vector to store + * + */ +void vst1_qs8(qint8_t *addr, qint8x8_t b); + +/** Store a single 8 bit fixed point vector to memory (16 elements) + * + * @param[in] addr Memory address where the 8 bit fixed point vector should be stored + * @param[in] b 8 bit fixed point vector to store + * + */ +void vst1q_qs8(qint8_t *addr, qint8x16_t b); + +/** Store a single 16 bit fixed point vector to memory (4 elements) + * + * @param[in] addr Memory address where the 16 bit fixed point vector should be stored + * @param[in] b 16 bit fixed point vector to store + * + */ +void vst1_qs16(qint16_t *addr, qint16x4_t b); + +/** Store a single 8 bit fixed point vector to memory (16 elements) + * + * @param[in] addr Memory address where the 16 bit fixed point vector should be stored + * @param[in] b 16 bit fixed point vector to store + * + */ +void vst1q_qs16(qint16_t *addr, qint16x8_t b); + +/** 16 bit fixed point vector saturating narrow (8 elements) + * + * @param[in] a 16 bit fixed point vector to convert + * + * @return 8 bit fixed point vector + */ +qint8x8_t vqmovn_q16(qint16x8_t a); + +/** 8 bit fixed point vector duplicate (8 elements) + * + * @param[in] a 8 bit fixed point to duplicate + * + * @return The result of the vector duplication + */ +qint8x8_t vdup_n_qs8(qint8_t a); + +/** 8 bit fixed point vector duplicate (16 elements) + * + * @param[in] a 8 bit fixed point to duplicate + * + * @return The result of the vector duplication + */ +qint8x16_t vdupq_n_qs8(qint8_t a); + +/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements) + * + * @param[in] a 8 bit fixed point to duplicate + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the vector duplication + */ +qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position); + +/** 16 bit fixed point vector duplicate (8 elements) + * + * @param[in] a 16 bit fixed point to duplicate + * + * @return The result of the vector duplication + */ +qint16x8_t vdupq_n_qs16(qint16x8_t a); + +/** Absolute value of 8 bit fixed point vector (8 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector absolute value + */ +qint8x8_t vabs_qs8(qint8x8_t a); + +/** Absolute value of 8 bit fixed point vector (16 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector absolute value + */ +qint8x16_t vabsq_qs8(qint8x16_t a); + +/** Saturating absolute value of 8 bit fixed point vector (8 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector absolute value + */ +qint8x8_t vqabs_qs8(qint8x8_t a); + +/** Saturating absolute value of 8 bit fixed point vector (16 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector absolute value + */ +qint8x16_t vqabsq_qs8(qint8x16_t a); + +/** 8 bit fixed point vector max (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector max operation + */ +qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector max (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector max operation + */ +qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector pairwise max (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector pairwise max operation + */ +qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector min (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector max operation + */ +qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector min (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector min operation + */ +qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector pairwise min (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector pairwise min operation + */ +qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector add (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector addition + */ +qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector add (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector addition + */ +qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector saturating add (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow + */ +qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector saturating add (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow + */ +qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b); + +/** 16 bit fixed point vector saturating add (4 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow + */ +qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b); + +/** 16 bit fixed point vector saturating add (8 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow + */ +qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b); + +/** 8 bit fixed point vector saturating pairwise add (8 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow + */ +int16x4_t vpaddl_qs8(qint8x8_t a); + +/** 8 bit fixed point vector subtraction (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector subtraction + */ +qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector subtraction (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector subtraction + */ +qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector saturating subtraction (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow + */ +qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector saturating subtraction (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow + */ +qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector multiply (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiplication. + */ +qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position); + +/** 8 bit fixed point vector multiply (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiplication. + */ +qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow + */ +qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow + */ +qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position); + +/** 8 bit fixed point vector long multiply (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point long vector multiplication. + */ +qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position); + +/** 8 bit fixed point vector multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate + */ +qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); + +/** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate + */ +qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow + */ +qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow + */ +qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position); + +/** 8 bit fixed point vector multiply-accumulate long (8 elements). + * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements + * + * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate long + */ +qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector. + * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements + * + * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate long + */ +qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); + +/** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements + * + * @param[in] a Float input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion float -> 8 bit fixed point + */ +qint8x8_t vcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position); + +/** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements + * + * @param[in] a Float input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion float -> 8 bit fixed point + */ +qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position); + +/** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements + * + * @param[in] a 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion 8 bit fixed point -> float32x2x4 + */ +float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position); + +/** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements + * + * @param[in] a 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion 8 bit fixed point -> float32x4x4 + */ +float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position); + +/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit reciprocal (1/a). + */ +qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit reciprocal (1/a). + */ +qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position); + +/** Division fixed point 8bit (8 elements) + * + * @param[in] a First 8bit fixed point input vector + * @param[in] b Second 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The quotient and remainder number in fixed point format. + */ +qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position); + +/** Division fixed point 8bit (16 elements) + * + * @param[in] a First 8bit fixed point input vector + * @param[in] b Second 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The quotient and remainder number in 8bit fixed point format. + */ +qint8x16_t vdivq_qs8(qint8x16_t a, int8x16_t b, int fixed_point_position); + +/** Perform a 4th degree polynomial approximation. (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit taylor approximation. + */ +template +qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position); + +/** Perform a 4th degree polynomial approximation. (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit taylor approximation. + */ +template +qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate saturating exponential fixed point 8bit (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit saturating exponential + */ +qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate saturating exponential fixed point 8bit (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit saturating exponential + */ +qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate logarithm fixed point 16bit (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit logarithm. + */ +qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate logarithm fixed point 16bit (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit logarithm. + */ +qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit inverse sqrt. + */ +qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit inverse sqrt. + */ +qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit inverse sqrt. + */ +qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit inverse sqrt. + */ +qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate hyperbolic tangent for fixed point 8bit (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The calculated Hyperbolic Tangent. + */ +qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate hyperbolic tangent for fixed point 8bit (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The calculated Hyperbolic Tangent. + */ +qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate saturating n power for fixed point 8bit (16 elements). + * + * pow(a,b) = e^(b*log(a)) + * + * @param[in] a 8bit fixed point input vector + * @param[in] b 8bit fixed point power vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit power. + */ +qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position); +} +#include "arm_compute/core/NEON/NEFixedPoint.inl" +#endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */ diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl new file mode 100644 index 0000000..6db344d --- /dev/null +++ b/arm_compute/core/NEON/NEFixedPoint.inl @@ -0,0 +1,1018 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +namespace arm_compute +{ +/**< Exponent polynomial coefficients for 8 bit fixed point (8 elements) + * Format is in Q0.7 for all elements */ +const std::array exp_tab_qs8 = +{ + { + vdup_n_s8(0x7F), // 0.9978546 + vdup_n_s8(0x3F), // 0.4994721 + vdup_n_s8(0x16), // 0.1763723 + vdup_n_s8(0x05), // 0.0435108 + } +}; + +/**< Exponent polynomial coefficients for 8 bit fixed point (16 elements) + * Format is in Q0.7 for all elements */ +const std::array exp_tabq_qs8 = +{ + { + vdupq_n_s8(0x7F), // 0.9978546 + vdupq_n_s8(0x3F), // 0.4994721 + vdupq_n_s8(0x16), // 0.1763723 + vdupq_n_s8(0x05), // 0.0435108 + } +}; + +/**< Logarithm polynomial coefficients for 8 bit fixed point (8 elements) + * Format is in Q0.7 for all elements except the first one which is in Q1.6 */ +const std::array log_tab_qs8 = +{ + { + vdup_n_s8(0x5C), // 1.4384189 + vdup_n_s8(-0x56), // -0.6771900 + vdup_n_s8(0x29), // 0.3218538 + vdup_n_s8(-0x0A), // -0.0832229 + } +}; + +/**< Logarithm polynomial coefficients for 8 bit fixed point (16 elements) + * Format is in Q0.7 for all elements except the first one which is in Q1.6 */ +const std::array log_tabq_qs8 = +{ + { + vdupq_n_s8(0x5C), // 1.4384189 + vdupq_n_s8(-0x56), // -0.6771900 + vdupq_n_s8(0x29), // 0.3218538 + vdupq_n_s8(-0x0A), // -0.0832229 + } +}; + +inline qint8x8_t vget_low_qs8(qint8x16_t a) +{ + return vget_low_s8(a); +} + +inline qint8x8_t vget_high_qs8(qint8x16_t a) +{ + return vget_high_s8(a); +} + +inline qint8x8_t vld1_qs8(const qint8_t *addr) +{ + return vld1_s8(addr); +} + +inline qint8x16_t vld1q_qs8(const qint8_t *addr) +{ + return vld1q_s8(addr); +} + +inline qint16x4_t vld1_qs16(const qint16_t *addr) +{ + return vld1_s16(addr); +} + +inline qint16x8_t vld1q_qs16(const qint16_t *addr) +{ + return vld1q_s16(addr); +} + +inline qint8x8_t vld1_dup_qs8(const qint8_t *addr) +{ + return vld1_dup_s8(addr); +} + +inline qint8x16_t vld1q_dup_qs8(const qint8_t *addr) +{ + return vld1q_dup_s8(addr); +} + +inline void vst1_qs8(qint8_t *addr, qint8x8_t b) +{ + vst1_s8(addr, b); +} + +inline void vst1q_qs8(qint8_t *addr, qint8x16_t b) +{ + vst1q_s8(addr, b); +} + +inline void vst1_qs16(qint16_t *addr, qint16x4_t b) +{ + vst1_s16(addr, b); +} + +inline void vst1q_qs16(qint16_t *addr, qint16x8_t b) +{ + vst1q_s16(addr, b); +} + +inline qint8x8_t vqmovn_qs16(qint16x8_t a) +{ + return vqmovn_s16(a); +} + +inline qint8x8_t vdup_n_qs8(qint8_t a) +{ + return vdup_n_s8(a); +} + +inline qint8x16_t vdupq_n_qs8(qint8_t a) +{ + return vdupq_n_s8(a); +} + +inline qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position) +{ + float32x4x4_t res = + { + { + vdupq_n_f32(a), + vdupq_n_f32(a), + vdupq_n_f32(a), + vdupq_n_f32(a), + } + }; + return vcvtq_qs8_f32(res, fixed_point_position); +} + +inline qint16x8_t vdupq_n_qs16(qint16_t a) +{ + return vdupq_n_s16(a); +} + +inline qint8x8_t vabs_qs8(qint8x8_t a) +{ + return vabs_s8(a); +} + +inline qint8x16_t vabsq_qs8(qint8x16_t a) +{ + return vabsq_s8(a); +} + +inline qint8x8_t vqabs_qs8(qint8x8_t a) +{ + return vqabs_s8(a); +} + +inline qint8x16_t vqabsq_qs8(qint8x16_t a) +{ + return vqabsq_s8(a); +} + +inline qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b) +{ + return vmax_s8(a, b); +} + +inline qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vmaxq_s8(a, b); +} + +inline qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b) +{ + return vpmax_s8(a, b); +} + +inline qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b) +{ + return vmin_s8(a, b); +} + +inline qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vminq_s8(a, b); +} + +inline qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b) +{ + return vpmin_s8(a, b); +} + +inline qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b) +{ + return vadd_s8(a, b); +} + +inline qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vaddq_s8(a, b); +} + +inline qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b) +{ + return vqadd_s8(a, b); +} + +inline qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vqaddq_s8(a, b); +} + +inline qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b) +{ + return vqadd_s16(a, b); +} + +inline qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b) +{ + return vqaddq_s16(a, b); +} + +inline int16x4_t vpaddl_qs8(qint8x8_t a) +{ + return vpaddl_s8(a); +} + +inline qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b) +{ + return vsub_s8(a, b); +} + +inline qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vsubq_s8(a, b); +} + +inline qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b) +{ + return vqsub_s8(a, b); +} + +inline qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vqsubq_s8(a, b); +} + +inline qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary result with a constant used to round up the result + qint16x8_t res = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + res = vmlal_s8(res, a, b); + + // Shift right by fixed_point_position + res = vshlq_s16(res, fixed_point_position_s16); + + // Convert back to qint8 + return vmovn_s16(res); +} + +inline qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t res0 = vdupq_n_s16(1 << (fixed_point_position - 1)); + qint16x8_t res1 = res0; + + // Vector multiply-accumulate long + res0 = vmlal_s8(res0, vget_low_s8(a), vget_low_s8(b)); + res1 = vmlal_s8(res1, vget_high_s8(a), vget_high_s8(b)); + + // Shift right by fixed_point_position + res0 = vshlq_s16(res0, fixed_point_position_s16); + res1 = vshlq_s16(res1, fixed_point_position_s16); + + // Convert back to qint8 + return vcombine_s8(vmovn_s16(res0), vmovn_s16(res1)); +} + +inline qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary result with a constant used to round up the result + qint16x8_t res = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + res = vmlal_s8(res, a, b); + + // Shift right by fixed_point_position + res = vqshlq_s16(res, fixed_point_position_s16); + + // Convert back to qint8 and saturate + return vqmovn_s16(res); +} + +inline qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t res0 = vdupq_n_s16(1 << (fixed_point_position - 1)); + qint16x8_t res1 = res0; + + // Vector multiply-accumulate long + res0 = vmlal_s8(res0, vget_low_s8(a), vget_low_s8(b)); + res1 = vmlal_s8(res1, vget_high_s8(a), vget_high_s8(b)); + + // Shift right by fixed_point_position + res0 = vqshlq_s16(res0, fixed_point_position_s16); + res1 = vqshlq_s16(res1, fixed_point_position_s16); + + // Convert back to qint8 and saturate + return vcombine_s8(vqmovn_s16(res0), vqmovn_s16(res1)); +} + +inline qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + qint16x8_t res = vmull_s8(a, b); + + return vqrshlq_s16(res, fixed_point_position_s16); +} + +inline qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s8(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vshlq_s16(tmp, fixed_point_position_s16); + + // Convert back to qint8 and accumulate + return vadd_s8(a, vmovn_s16(tmp)); +} + +inline qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp0 = vdupq_n_s16(1 << (fixed_point_position - 1)); + qint16x8_t tmp1 = tmp0; + + // Vector multiply-accumulate long + tmp0 = vmlal_s8(tmp0, vget_low_s8(b), vget_low_s8(c)); + tmp1 = vmlal_s8(tmp1, vget_high_s8(b), vget_high_s8(c)); + + // Shift right by fixed_point_position + tmp0 = vshlq_s16(tmp0, fixed_point_position_s16); + tmp1 = vshlq_s16(tmp1, fixed_point_position_s16); + + // Convert back to qint8 and accumulate + return vcombine_s8(vadd_s8(vget_low_s8(a), vmovn_s16(tmp0)), vadd_s8(vget_high_s8(a), vmovn_s16(tmp1))); +} + +inline qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s8(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vqshlq_s16(tmp, fixed_point_position_s16); + + // Convert back to qint8 and accumulate + return vqadd_s8(a, vqmovn_s16(tmp)); +} + +inline qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp0 = vdupq_n_s16(1 << (fixed_point_position - 1)); + qint16x8_t tmp1 = tmp0; + + // Vector multiply-accumulate long + tmp0 = vmlal_s8(tmp0, vget_low_s8(b), vget_low_s8(c)); + tmp1 = vmlal_s8(tmp1, vget_high_s8(b), vget_high_s8(c)); + + // Shift right by fixed_point_position + tmp0 = vqshlq_s16(tmp0, fixed_point_position_s16); + tmp1 = vqshlq_s16(tmp1, fixed_point_position_s16); + + // Convert back to qint8 and accumulate + qint8x16_t res = vcombine_s8(vqmovn_s16(tmp0), vqmovn_s16(tmp1)); + return vqaddq_s8(a, res); +} + +inline qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s8(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vshlq_s16(tmp, fixed_point_position_s16); + + // Accumulate + return vaddq_s16(a, tmp); +} + +inline qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s8(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vqshlq_s16(tmp, fixed_point_position_s16); + + // Accumulate + return vqaddq_s16(a, tmp); +} + +inline qint8x8_t vcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(static_cast(1 << fixed_point_position)); + + float32x4x2_t res_f32 = + { + { + vdupq_n_f32(0.5f), + vdupq_n_f32(0.5f) + } + }; + + res_f32.val[0] = vmlaq_f32(res_f32.val[0], a.val[0], pow2); + res_f32.val[1] = vmlaq_f32(res_f32.val[1], a.val[1], pow2); + + const int32x4x2_t res_s32 = + { + { + vcvtq_s32_f32(res_f32.val[0]), + vcvtq_s32_f32(res_f32.val[1]), + } + }; + + const int16x8_t res_s16 = vcombine_s16(vqmovn_s32(res_s32.val[0]), vqmovn_s32(res_s32.val[1])); + + return vqmovn_s16(res_s16); +} + +inline qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(static_cast(1 << fixed_point_position)); + + float32x4x4_t res_f32 = + { + { + vdupq_n_f32(0.5f), + vdupq_n_f32(0.5f), + vdupq_n_f32(0.5f), + vdupq_n_f32(0.5f) + } + }; + + res_f32.val[0] = vmlaq_f32(res_f32.val[0], a.val[0], pow2); + res_f32.val[1] = vmlaq_f32(res_f32.val[1], a.val[1], pow2); + res_f32.val[2] = vmlaq_f32(res_f32.val[2], a.val[2], pow2); + res_f32.val[3] = vmlaq_f32(res_f32.val[3], a.val[3], pow2); + + const int32x4x4_t res_s32 = + { + { + vcvtq_s32_f32(res_f32.val[0]), + vcvtq_s32_f32(res_f32.val[1]), + vcvtq_s32_f32(res_f32.val[2]), + vcvtq_s32_f32(res_f32.val[3]), + } + }; + + const int16x8x2_t res_s16 = + { + { + vcombine_s16(vqmovn_s32(res_s32.val[0]), vqmovn_s32(res_s32.val[1])), + vcombine_s16(vqmovn_s32(res_s32.val[2]), vqmovn_s32(res_s32.val[3])), + } + }; + + return vcombine_s8(vqmovn_s16(res_s16.val[0]), vqmovn_s16(res_s16.val[1])); +} + +inline float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(1.0f / (1 << fixed_point_position)); + + const int16x8_t res_s16 = vmovl_s8(a); + + const int32x4x2_t res_s32 = + { + { + vmovl_s16(vget_low_s16(res_s16)), + vmovl_s16(vget_high_s16(res_s16)) + } + }; + + float32x4x2_t res_f32 = + { + { + vcvtq_f32_s32(res_s32.val[0]), + vcvtq_f32_s32(res_s32.val[1]) + } + }; + + res_f32.val[0] = vmulq_f32(res_f32.val[0], pow2); + res_f32.val[1] = vmulq_f32(res_f32.val[1], pow2); + + return res_f32; +} + +inline float32x4x4_t vcvtq_f32_qs8(qint8x16_t a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(1.0f / (1 << fixed_point_position)); + + const int16x8x2_t res_s16 = + { + { + vmovl_s8(vget_low_s8(a)), + vmovl_s8(vget_high_s8(a)), + } + }; + + const int32x4x4_t res_s32 = + { + { + vmovl_s16(vget_low_s16(res_s16.val[0])), + vmovl_s16(vget_high_s16(res_s16.val[0])), + vmovl_s16(vget_low_s16(res_s16.val[1])), + vmovl_s16(vget_high_s16(res_s16.val[1])), + } + }; + + float32x4x4_t res_f32 = + { + { + vcvtq_f32_s32(res_s32.val[0]), + vcvtq_f32_s32(res_s32.val[1]), + vcvtq_f32_s32(res_s32.val[2]), + vcvtq_f32_s32(res_s32.val[3]) + } + }; + + res_f32.val[0] = vmulq_f32(res_f32.val[0], pow2); + res_f32.val[1] = vmulq_f32(res_f32.val[1], pow2); + res_f32.val[2] = vmulq_f32(res_f32.val[2], pow2); + res_f32.val[3] = vmulq_f32(res_f32.val[3], pow2); + + return res_f32; +} + +inline qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position) +{ + // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0 + const qint8x8_t const_48_over_17 = vdup_n_s8(0x7A >> (5 - fixed_point_position)); // 2.823 + const qint8x8_t const_minus_32_over_17 = vdup_n_s8(-(0x3C >> (5 - fixed_point_position))); // -1.8823 + const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); + + // Find shift value + const qint8x8_t shift_value = vneg_s8(vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)))); + const qint8x8_t temp = vshl_s8(a, shift_value); + + qint8x8_t x = vadd_s8(const_48_over_17, vmul_qs8(temp, const_minus_32_over_17, fixed_point_position)); + + uint8x8_t set_one = vcgt_s8(x, const_one); + x = vbsl_s8(set_one, const_one, x); + + // Use three iterations of Newton-Raphson method to get the result + x = vadd_s8(x, vmul_qs8(x, vsub_s8(const_one, vmul_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vadd_s8(x, vmul_qs8(x, vsub_s8(const_one, vmul_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vadd_s8(x, vmul_qs8(x, vsub_s8(const_one, vmul_qs8(temp, x, fixed_point_position)), fixed_point_position)); + + return vshl_s8(x, shift_value); +} + +inline qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position) +{ + // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0 + const qint8x16_t const_48_over_17 = vdupq_n_s8(0x7A >> (5 - fixed_point_position)); // 2.823 + const qint8x16_t const_minus_32_over_17 = vdupq_n_s8((0x3C >> (5 - fixed_point_position))); // -1.8823 + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + + // Find shift value + const qint8x16_t shift_value = vnegq_s8(vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); + const qint8x16_t temp = vshlq_s8(a, shift_value); + + qint8x16_t x = vsubq_qs8(const_48_over_17, vmulq_qs8(temp, const_minus_32_over_17, fixed_point_position)); + + // Set initial guess to one if x > 1 + uint8x16_t set_one = vcgtq_s8(x, const_one); + x = vbslq_s8(set_one, const_one, x); + + // Use three iterations of Newton-Raphson method to get the result + x = vaddq_s8(x, vmulq_qs8(x, vsubq_s8(const_one, vmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vaddq_s8(x, vmulq_qs8(x, vsubq_s8(const_one, vmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vaddq_s8(x, vmulq_qs8(x, vsubq_s8(const_one, vmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + + return vshlq_s8(x, shift_value); +} + +inline qint8x16_t vqrecipq_qs8(qint8x16_t a, int fixed_point_position) +{ + // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0 + const qint8x16_t const_48_over_17 = vdupq_n_s8(0x7A >> (5 - fixed_point_position)); // 2.823 + const qint8x16_t const_minus_32_over_17 = vdupq_n_s8((0x3C >> (5 - fixed_point_position))); // -1.8823 + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + + // Find shift value + const qint8x16_t shift_value = vqnegq_s8(vqsubq_s8(vdupq_n_s8(8), vqaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); + const qint8x16_t temp = vqshlq_s8(a, shift_value); + + qint8x16_t x = vqsubq_qs8(const_48_over_17, vmulq_qs8(temp, const_minus_32_over_17, fixed_point_position)); + + // Set initial guess to one if x > 1 + uint8x16_t set_one = vcgtq_s8(x, const_one); + x = vbslq_s8(set_one, const_one, x); + + // Use three iterations of Newton-Raphson method to get the result + x = vqaddq_s8(x, vqmulq_qs8(x, vqsubq_s8(const_one, vqmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vqaddq_s8(x, vqmulq_qs8(x, vqsubq_s8(const_one, vqmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vqaddq_s8(x, vqmulq_qs8(x, vqsubq_s8(const_one, vqmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + + return vqshlq_s8(x, shift_value); +} + +inline qint8x8_t vdiv_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) +{ + return vmul_qs8(a, vrecip_qs8(b, fixed_point_position), fixed_point_position); +} + +inline qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) +{ + return vmulq_qs8(a, vrecipq_qs8(b, fixed_point_position), fixed_point_position); +} + +template +inline qint8x8_t vtaylor_poly_qs8(int8x8_t a, int fixed_point_position) +{ + const qint8x8_t shift_value = vdup_n_s8(-(7 - fixed_point_position)); + const qint8x8_t const_one = vdup_n_s8(1); + const qint8x8_t A = vrshl_s8(islog ? log_tab_qs8[0] : exp_tab_qs8[0], islog ? vadd_s8(shift_value, const_one) : shift_value); + const qint8x8_t B = vrshl_s8(islog ? log_tab_qs8[1] : exp_tab_qs8[1], shift_value); + const qint8x8_t C = vrshl_s8(islog ? log_tab_qs8[2] : exp_tab_qs8[2], shift_value); + const qint8x8_t D = vrshl_s8(islog ? log_tab_qs8[3] : exp_tab_qs8[3], shift_value); + const qint8x8_t x1 = vadd_s8(vmul_qs8(a, D, fixed_point_position), C); + const qint8x8_t x2 = vadd_s8(vmul_qs8(a, x1, fixed_point_position), B); + const qint8x8_t x3 = vadd_s8(vmul_qs8(a, x2, fixed_point_position), A); + const qint8x8_t res = vmul_qs8(a, x3, fixed_point_position); + return res; +} + +template +inline qint8x8_t vqtaylor_poly_qs8(int8x8_t a, int fixed_point_position) +{ + const qint8x8_t shift_value = vdup_n_s8(-(7 - fixed_point_position)); + const qint8x8_t const_one = vdup_n_s8(1); + const qint8x8_t A = vqrshl_s8(islog ? log_tab_qs8[0] : exp_tab_qs8[0], islog ? vqadd_s8(shift_value, const_one) : shift_value); + const qint8x8_t B = vqrshl_s8(islog ? log_tab_qs8[1] : exp_tab_qs8[1], shift_value); + const qint8x8_t C = vqrshl_s8(islog ? log_tab_qs8[2] : exp_tab_qs8[2], shift_value); + const qint8x8_t D = vqrshl_s8(islog ? log_tab_qs8[3] : exp_tab_qs8[3], shift_value); + const qint8x8_t x1 = vqadd_s8(vqmul_qs8(a, D, fixed_point_position), C); + const qint8x8_t x2 = vqadd_s8(vqmul_qs8(a, x1, fixed_point_position), B); + const qint8x8_t x3 = vqadd_s8(vqmul_qs8(a, x2, fixed_point_position), A); + const qint8x8_t res = vqmul_qs8(a, x3, fixed_point_position); + return res; +} + +template +inline qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t shift_value = vdupq_n_s8(-(7 - fixed_point_position)); + const qint8x16_t const_one = vdupq_n_s8(1); + const qint8x16_t A = vrshlq_s8(islog ? log_tabq_qs8[0] : exp_tabq_qs8[0], islog ? vaddq_s8(shift_value, const_one) : shift_value); + const qint8x16_t B = vrshlq_s8(islog ? log_tabq_qs8[1] : exp_tabq_qs8[1], shift_value); + const qint8x16_t C = vrshlq_s8(islog ? log_tabq_qs8[2] : exp_tabq_qs8[2], shift_value); + const qint8x16_t D = vrshlq_s8(islog ? log_tabq_qs8[3] : exp_tabq_qs8[3], shift_value); + const qint8x16_t x1 = vaddq_s8(vmulq_qs8(a, D, fixed_point_position), C); + const qint8x16_t x2 = vaddq_s8(vmulq_qs8(a, x1, fixed_point_position), B); + const qint8x16_t x3 = vaddq_s8(vmulq_qs8(a, x2, fixed_point_position), A); + const qint8x16_t res = vmulq_qs8(a, x3, fixed_point_position); + return res; +} + +template +inline qint8x16_t vqtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t shift_value = vdupq_n_s8(-(7 - fixed_point_position)); + const qint8x16_t const_one = vdupq_n_s8(1); + const qint8x16_t A = vqrshlq_s8(islog ? log_tabq_qs8[0] : exp_tabq_qs8[0], islog ? vqaddq_s8(shift_value, const_one) : shift_value); + const qint8x16_t B = vqrshlq_s8(islog ? log_tabq_qs8[1] : exp_tabq_qs8[1], shift_value); + const qint8x16_t C = vqrshlq_s8(islog ? log_tabq_qs8[2] : exp_tabq_qs8[2], shift_value); + const qint8x16_t D = vqrshlq_s8(islog ? log_tabq_qs8[3] : exp_tabq_qs8[3], shift_value); + const qint8x16_t x1 = vqaddq_s8(vqmulq_qs8(a, D, fixed_point_position), C); + const qint8x16_t x2 = vqaddq_s8(vqmulq_qs8(a, x1, fixed_point_position), B); + const qint8x16_t x3 = vqaddq_s8(vqmulq_qs8(a, x2, fixed_point_position), A); + const qint8x16_t res = vqmulq_qs8(a, x3, fixed_point_position); + return res; +} + +inline qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t shift_value = vdup_n_s8(fixed_point_position - 7); + const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); + const qint8x8_t const_ln2 = vqrshl_s8(vdup_n_s8(0x58), shift_value); // ln(2) + const qint8x8_t const_inv_ln2 = vorr_s8(vqrshl_s8(vdup_n_s8(0x38), shift_value), const_one); // 1/ln(2) + + // Perform range reduction [-log(2),log(2)] + const qint8x8_t m = vqmul_qs8(a, const_inv_ln2, fixed_point_position); // x / ln(2) + + // get decimal part from m + const qint8x8_t dec_m = vqshl_s8(m, vdup_n_s8(-fixed_point_position)); + + qint8x8_t alpha = vqmul_qs8(vqshl_s8(dec_m, vdup_n_s8(fixed_point_position)), const_ln2, fixed_point_position); + alpha = vqabs_qs8(vqsub_s8(a, alpha)); + + // Polynomial Approximation + qint8x8_t poly = vqtaylor_poly_qs8(alpha, fixed_point_position); + poly = vqadd_s8(poly, const_one); + + // Reconstruct + poly = vqshl_s8(poly, dec_m); + + return poly; +} + +inline qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t shift_value = vdupq_n_s8(fixed_point_position - 7); + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + const qint8x16_t const_ln2 = vqrshlq_s8(vdupq_n_s8(0x58), shift_value); // ln(2) + const qint8x16_t const_inv_ln2 = vorrq_s8(vqrshlq_s8(vdupq_n_s8(0x38), shift_value), const_one); // 1/ln(2) + + // Perform range reduction [-log(2),log(2)] + const qint8x16_t m = vqmulq_qs8(a, const_inv_ln2, fixed_point_position); // x / ln(2) + + // get decimal part from m + const qint8x16_t dec_m = vqshlq_s8(m, vdupq_n_s8(-fixed_point_position)); + + qint8x16_t alpha = vqmulq_qs8(vqshlq_s8(dec_m, vdupq_n_s8(fixed_point_position)), const_ln2, fixed_point_position); + alpha = vqabsq_qs8(vqsubq_qs8(a, alpha)); + + // Polynomial Approximation + qint8x16_t poly = vqtaylor_polyq_qs8(alpha, fixed_point_position); + poly = vqaddq_s8(poly, const_one); + + // Reconstruct + poly = vqshlq_s8(poly, dec_m); + + return poly; +} + +inline qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); + const qint8x8_t const_seven_dec = vdup_n_s8(7); + const qint8x8_t const_ln2 = vdup_n_s8(0x58 >> (7 - fixed_point_position)); // ln(2) + + // If 0 < a < 1, calculate log(1/x) + uint8x8_t calc_reciprocal = vclt_s8(a, const_one); + qint8x8_t recip = vdup_n_s8(0); + recip = vbsl_s8(calc_reciprocal, recip, a); + + // Calculate reciprocal + recip = vrecip_qs8(recip, fixed_point_position); + a = vbsl_s8(calc_reciprocal, recip, a); + + // Get decimal part of a + qint8x8_t shift_value = vdup_n_s8(-fixed_point_position); + qint8x8_t dec_a = vshl_s8(a, shift_value); // a >> fixed_point_position + + // Get exponent of 2^n which is equal or less than dec_a + shift_value = vsub_s8(const_seven_dec, vclz_s8(dec_a)); + + // Get x to range (1, 2] + const qint8x8_t shift_value_neg = vneg_s8(shift_value); + const qint8x8_t temp = vsub_s8(vrshl_s8(a, shift_value_neg), const_one); + const qint8x8_t sum = vmul_s8(shift_value, const_one); + + // Polynomial Approximation + qint8x8_t poly = vtaylor_poly_qs8(temp, fixed_point_position); + + // Reconstruct + poly = vmul_qs8(vadd_s8(poly, sum), const_ln2, fixed_point_position); + + // Set negative value for 0 < a < 1 + poly = vbsl_s8(calc_reciprocal, vneg_s8(poly), poly); + + return poly; +} + +inline qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + const qint8x16_t const_seven_dec = vdupq_n_s8(7); + const qint8x16_t const_ln2 = vdupq_n_s8(0x58 >> (7 - fixed_point_position)); // ln(2) + + // If 0 < a < 1, calculate log(1/x) + uint8x16_t calc_reciprocal = vcltq_s8(a, const_one); + qint8x16_t recip = vdupq_n_s8(0); + recip = vbslq_s8(calc_reciprocal, a, recip); + + // Calculate reciprocal + recip = vrecipq_qs8(recip, fixed_point_position); + a = vbslq_s8(calc_reciprocal, recip, a); + + // Get decimal part of a + qint8x16_t shift_value = vdupq_n_s8(-fixed_point_position); + qint8x16_t dec_a = vshlq_s8(a, shift_value); // a >> fixed_point_position + + // Get exponent of 2^n which is equal or less than dec_a + shift_value = vsubq_s8(const_seven_dec, vclzq_s8(dec_a)); + + // Get x to range (1, 2] + const qint8x16_t shift_value_neg = vnegq_s8(shift_value); + const qint8x16_t temp = vsubq_s8(vrshlq_s8(a, shift_value_neg), const_one); + const qint8x16_t sum = vmulq_s8(shift_value, const_one); + + // Polynomial Approximation + qint8x16_t poly = vtaylor_polyq_qs8(temp, fixed_point_position); + + // Reconstruct + poly = vmulq_qs8(vaddq_s8(poly, sum), const_ln2, fixed_point_position); + + // Set negative value for 0 < a < 1 + poly = vbslq_s8(calc_reciprocal, vnegq_s8(poly), poly); + + return poly; +} + +inline qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t const_three = vdup_n_s8(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint8x8_t shift_value = vneg_s8(vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint8x8_t temp = vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position))); + uint8x8_t temp_ltz = vclt_s8(temp, vdup_n_qs8(0)); + temp = vbsl_s8(temp_ltz, vadd_s8(temp, vdup_n_s8(1)), temp); + qint8x8_t shift_value2 = vneg_s8(vshr_n_s8(temp, 1)); + + temp = vshl_s8(a, shift_value); + + // Initial guess + qint8x8_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After three iterations we have the result for 8 bit + x = vshr_n_s8(vmul_qs8(x, vsub_s8(const_three, vmul_qs8(temp, vmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s8(vmul_qs8(x, vsub_s8(const_three, vmul_qs8(temp, vmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s8(vmul_qs8(x, vsub_s8(const_three, vmul_qs8(temp, vmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vshl_s8(x, shift_value2); +} + +inline qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t const_three = vdup_n_s8(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint8x8_t shift_value = vneg_s8(vqsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint8x8_t temp = vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position))); + uint8x8_t temp_ltz = vclt_s8(temp, vdup_n_qs8(0)); + temp = vbsl_s8(temp_ltz, vadd_s8(temp, vdup_n_s8(1)), temp); + qint8x8_t shift_value2 = vneg_s8(vshr_n_s8(temp, 1)); + + temp = vshl_s8(a, shift_value); + + // Initial guess + qint8x8_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After three iterations we have the result for 8 bit + x = vshr_n_s8(vqmul_qs8(x, vqsub_s8(const_three, vqmul_qs8(temp, vqmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s8(vqmul_qs8(x, vqsub_s8(const_three, vqmul_qs8(temp, vqmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s8(vqmul_qs8(x, vqsub_s8(const_three, vqmul_qs8(temp, vqmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vshl_s8(x, shift_value2); +} + +inline qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t const_three = vdupq_n_s8(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint8x16_t shift_value = vnegq_s8(vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint8x16_t temp = vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position))); + uint8x16_t temp_ltz = vcltq_s8(temp, vdupq_n_qs8(0)); + temp = vbslq_s8(temp_ltz, vaddq_s8(temp, vdupq_n_s8(1)), temp); + qint8x16_t shift_value2 = vnegq_s8(vshrq_n_s8(temp, 1)); + + temp = vshlq_s8(a, shift_value); + + // Initial guess + qint8x16_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After three iterations we have the result for 8 bit + x = vshrq_n_s8(vmulq_qs8(x, vsubq_s8(const_three, vmulq_qs8(temp, vmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s8(vmulq_qs8(x, vsubq_s8(const_three, vmulq_qs8(temp, vmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s8(vmulq_qs8(x, vsubq_s8(const_three, vmulq_qs8(temp, vmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vshlq_s8(x, shift_value2); +} + +inline qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t const_three = vdupq_n_s8(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint8x16_t shift_value = vnegq_s8(vqsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint8x16_t temp = vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position))); + uint8x16_t temp_ltz = vcltq_s8(temp, vdupq_n_qs8(0)); + temp = vbslq_s8(temp_ltz, vaddq_s8(temp, vdupq_n_s8(1)), temp); + qint8x16_t shift_value2 = vnegq_s8(vshrq_n_s8(temp, 1)); + + temp = vshlq_s8(a, shift_value); + + // Initial guess + qint8x16_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After three iterations we have the result for 8 bit + x = vshrq_n_s8(vqmulq_qs8(x, vqsubq_s8(const_three, vqmulq_qs8(temp, vqmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s8(vqmulq_qs8(x, vqsubq_s8(const_three, vqmulq_qs8(temp, vqmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s8(vqmulq_qs8(x, vqsubq_s8(const_three, vqmulq_qs8(temp, vqmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vshlq_s8(x, shift_value2); +} + +inline qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); + const qint8x8_t const_two = vdup_n_s8(2 << fixed_point_position); + + qint8x8_t exp2x = vqexp_qs8(vqmul_qs8(const_two, a, fixed_point_position), fixed_point_position); + qint8x8_t num = vqsub_qs8(exp2x, const_one); + qint8x8_t den = vqadd_qs8(exp2x, const_one); + qint8x8_t tanh = vqmul_qs8(num, vrecip_qs8(den, fixed_point_position), fixed_point_position); + + return tanh; +} + +inline qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + const qint8x16_t const_two = vdupq_n_s8(2 << fixed_point_position); + + qint8x16_t exp2x = vqexpq_qs8(vqmulq_qs8(const_two, a, fixed_point_position), fixed_point_position); + qint8x16_t num = vqsubq_qs8(exp2x, const_one); + qint8x16_t den = vqaddq_qs8(exp2x, const_one); + qint8x16_t tanh = vqmulq_qs8(num, vqrecipq_qs8(den, fixed_point_position), fixed_point_position); + + return tanh; +} + +inline qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) +{ + return vqexpq_qs8(vqmulq_qs8(b, vlogq_qs8(a, fixed_point_position), fixed_point_position), fixed_point_position); +} +} diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h index 55f54dd..eaa50f1 100644 --- a/arm_compute/core/NEON/NEKernels.h +++ b/arm_compute/core/NEON/NEKernels.h @@ -30,6 +30,7 @@ #include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" +#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h" #include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h" #include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h" @@ -41,11 +42,13 @@ #include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" #include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h" #include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" -#include "arm_compute/core/NEON/kernels/NEConvolutionLayerWeightsReshapeKernel.h" #include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h" +#include "arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h" #include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h" #include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" #include "arm_compute/core/NEON/kernels/NEDilateKernel.h" +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h" +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEErodeKernel.h" #include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" #include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" @@ -62,12 +65,12 @@ #include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" #include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" #include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" -#include "arm_compute/core/NEON/kernels/NEHOGNonMaximaSuppressionKernel.h" #include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" #include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" #include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h" #include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" +#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" #include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" #include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" #include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h" @@ -88,5 +91,6 @@ #include "arm_compute/core/NEON/kernels/NEThresholdKernel.h" #include "arm_compute/core/NEON/kernels/NETransposeKernel.h" #include "arm_compute/core/NEON/kernels/NEWarpKernel.h" +#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" #endif /* __ARM_COMPUTE_NEKERNELS_H__ */ diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h index d40e0c3..bb8a330 100644 --- a/arm_compute/core/NEON/NEMath.h +++ b/arm_compute/core/NEON/NEMath.h @@ -28,131 +28,46 @@ namespace arm_compute { -/* Exponent polynomial coefficients */ -const std::array exp_tab = -{ - { - vdupq_n_f32(1.f), - vdupq_n_f32(0.0416598916054f), - vdupq_n_f32(0.500000596046f), - vdupq_n_f32(0.0014122662833f), - vdupq_n_f32(1.00000011921f), - vdupq_n_f32(0.00833693705499f), - vdupq_n_f32(0.166665703058f), - vdupq_n_f32(0.000195780929062f), - } -}; - -/* Logarithm polynomial coefficients */ -const std::array log_tab = -{ - { - vdupq_n_f32(-2.29561495781f), - vdupq_n_f32(-2.47071170807f), - vdupq_n_f32(-5.68692588806f), - vdupq_n_f32(-0.165253549814f), - vdupq_n_f32(5.17591238022f), - vdupq_n_f32(0.844007015228f), - vdupq_n_f32(4.58445882797f), - vdupq_n_f32(0.0141278216615f), - } -}; - /** Calculate inverse square root. * - * @param x Input value. + * @param[in] x Input value. * * @return The calculated inverse square root. */ -inline float32x4_t vinvsqrtq_f32(float32x4_t x) -{ - float32x4_t sqrt_reciprocal = vrsqrteq_f32(x); - sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); - sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); - - return sqrt_reciprocal; -} +float32x4_t vinvsqrtq_f32(float32x4_t x); /** Calculate reciprocal. * - * @param x Input value. + * @param[in] x Input value. * * @return The calculated reciprocal. */ -inline float32x4_t vinvq_f32(const float32x4_t &x) -{ - float32x4_t recip = vrecpeq_f32(x); - recip = vmulq_f32(vrecpsq_f32(x, recip), recip); - recip = vmulq_f32(vrecpsq_f32(x, recip), recip); - return recip; -} +float32x4_t vinvq_f32(float32x4_t x); /** Perform a 7th degree polynomial approximation using Estrin's method. * - * @param x Input vector value in F32 format. - * @param coeffs Polynomial coefficients table. + * @param[in] x Input vector value in F32 format. + * @param[in] coeffs Polynomial coefficients table. * * @return The calculated approximation. */ -inline float32x4_t vtaylor_polyq_f32(const float32x4_t &x, const std::array &coeffs) -{ - float32x4_t A = vmlaq_f32(coeffs[0], coeffs[4], x); - float32x4_t B = vmlaq_f32(coeffs[2], coeffs[6], x); - float32x4_t C = vmlaq_f32(coeffs[1], coeffs[5], x); - float32x4_t D = vmlaq_f32(coeffs[3], coeffs[7], x); - float32x4_t x2 = vmulq_f32(x, x); - float32x4_t x4 = vmulq_f32(x2, x2); - float32x4_t res = vmlaq_f32(vmlaq_f32(A, B, x2), vmlaq_f32(C, D, x2), x4); - return res; -} +float32x4_t vtaylor_polyq_f32(float32x4_t x, const std::array &coeffs); /** Calculate exponential * - * @param x Input vector value in F32 format. + * @param[in] x Input vector value in F32 format. * * @return The calculated exponent. */ -inline float32x4_t vexpq_f32(const float32x4_t &x) -{ - static const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2) - static const float32x4_t CONST_INV_LN2 = vdupq_n_f32(1.4426950408f); // 1/ln(2) - - // Perform range reduction [-log(2),log(2)] - int32x4_t m = vcvtq_s32_f32(vmulq_f32(x, CONST_INV_LN2)); - float32x4_t val = vmlsq_f32(x, vcvtq_f32_s32(m), CONST_LN2); - - // Polynomial Approximation - float32x4_t poly = vtaylor_polyq_f32(val, exp_tab); - - // Reconstruct - poly = vreinterpretq_f32_s32(vaddq_s32(vreinterpretq_s32_f32(poly), vshlq_n_s32(m, 23))); - - return poly; -} +float32x4_t vexpq_f32(float32x4_t x); /** Calculate logarithm * - * @param x Input vector value in F32 format. + * @param[in] x Input vector value in F32 format. * * @return The calculated logarithm. */ -inline float32x4_t vlogq_f32(const float32x4_t &x) -{ - static const int32x4_t CONST_127 = vdupq_n_s32(127); // 127 - static const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2) - - // Extract exponent - int32x4_t m = vsubq_s32(vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_f32(x), 23)), CONST_127); - float32x4_t val = vreinterpretq_f32_s32(vsubq_s32(vreinterpretq_s32_f32(x), vshlq_n_s32(m, 23))); - - // Polynomial Approximation - float32x4_t poly = vtaylor_polyq_f32(val, log_tab); - - // Reconstruct - poly = vmlaq_f32(poly, vcvtq_f32_s32(m), CONST_LN2); - - return poly; -} +float32x4_t vlogq_f32(float32x4_t x); /** Calculate hyperbolic tangent. * @@ -160,38 +75,22 @@ inline float32x4_t vlogq_f32(const float32x4_t &x) * * @note We clamp x to [-5,5] to avoid overflowing issues. * - * @param val Input vector value in F32 format. + * @param[in] val Input vector value in F32 format. * * @return The calculated Hyperbolic Tangent. */ -inline float32x4_t vtanhq_f32(const float32x4_t &val) -{ - static const float32x4_t CONST_1 = vdupq_n_f32(1.f); // 1.f - static const float32x4_t CONST_2 = vdupq_n_f32(2.f); // 2.f - static const float32x4_t CONST_MIN_TANH = vdupq_n_f32(-5.f); // -5.f - static const float32x4_t CONST_MAX_TANH = vdupq_n_f32(5.f); // 5.f - - float32x4_t x = vminq_f32(vmaxq_f32(val, CONST_MIN_TANH), CONST_MAX_TANH); - float32x4_t exp2x = vexpq_f32(vmulq_f32(CONST_2, x)); - float32x4_t num = vsubq_f32(exp2x, CONST_1); - float32x4_t den = vaddq_f32(exp2x, CONST_1); - float32x4_t tanh = vmulq_f32(num, vinvq_f32(den)); - return tanh; -} +float32x4_t vtanhq_f32(float32x4_t val); /** Calculate n power of a number. * * pow(x,n) = e^(n*log(x)) * - * @param val Input vector value in F32 format. - * @param n Powers to raise the input to. + * @param[in] val Input vector value in F32 format. + * @param[in] n Powers to raise the input to. * * @return The calculated power. */ -inline float32x4_t vpowq_f32(const float32x4_t &val, const float32x4_t &n) -{ - return vexpq_f32(vmulq_f32(n, vlogq_f32(val))); -} +float32x4_t vpowq_f32(float32x4_t val, float32x4_t n); } - +#include "arm_compute/core/NEON/NEMath.inl" #endif /* __ARM_COMPUTE_NEMATH_H__ */ diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl new file mode 100644 index 0000000..a31a4c0 --- /dev/null +++ b/arm_compute/core/NEON/NEMath.inl @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +namespace arm_compute +{ +/* Exponent polynomial coefficients */ +const std::array exp_tab = +{ + { + vdupq_n_f32(1.f), + vdupq_n_f32(0.0416598916054f), + vdupq_n_f32(0.500000596046f), + vdupq_n_f32(0.0014122662833f), + vdupq_n_f32(1.00000011921f), + vdupq_n_f32(0.00833693705499f), + vdupq_n_f32(0.166665703058f), + vdupq_n_f32(0.000195780929062f), + } +}; + +/* Logarithm polynomial coefficients */ +const std::array log_tab = +{ + { + vdupq_n_f32(-2.29561495781f), + vdupq_n_f32(-2.47071170807f), + vdupq_n_f32(-5.68692588806f), + vdupq_n_f32(-0.165253549814f), + vdupq_n_f32(5.17591238022f), + vdupq_n_f32(0.844007015228f), + vdupq_n_f32(4.58445882797f), + vdupq_n_f32(0.0141278216615f), + } +}; + +inline float32x4_t vinvsqrtq_f32(float32x4_t x) +{ + float32x4_t sqrt_reciprocal = vrsqrteq_f32(x); + sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + + return sqrt_reciprocal; +} + +inline float32x4_t vinvq_f32(float32x4_t x) +{ + float32x4_t recip = vrecpeq_f32(x); + recip = vmulq_f32(vrecpsq_f32(x, recip), recip); + recip = vmulq_f32(vrecpsq_f32(x, recip), recip); + return recip; +} + +inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const std::array &coeffs) +{ + float32x4_t A = vmlaq_f32(coeffs[0], coeffs[4], x); + float32x4_t B = vmlaq_f32(coeffs[2], coeffs[6], x); + float32x4_t C = vmlaq_f32(coeffs[1], coeffs[5], x); + float32x4_t D = vmlaq_f32(coeffs[3], coeffs[7], x); + float32x4_t x2 = vmulq_f32(x, x); + float32x4_t x4 = vmulq_f32(x2, x2); + float32x4_t res = vmlaq_f32(vmlaq_f32(A, B, x2), vmlaq_f32(C, D, x2), x4); + return res; +} + +inline float32x4_t vexpq_f32(float32x4_t x) +{ + static const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2) + static const float32x4_t CONST_INV_LN2 = vdupq_n_f32(1.4426950408f); // 1/ln(2) + + // Perform range reduction [-log(2),log(2)] + int32x4_t m = vcvtq_s32_f32(vmulq_f32(x, CONST_INV_LN2)); + float32x4_t val = vmlsq_f32(x, vcvtq_f32_s32(m), CONST_LN2); + + // Polynomial Approximation + float32x4_t poly = vtaylor_polyq_f32(val, exp_tab); + + // Reconstruct + poly = vreinterpretq_f32_s32(vaddq_s32(vreinterpretq_s32_f32(poly), vshlq_n_s32(m, 23))); + + return poly; +} + +inline float32x4_t vlogq_f32(float32x4_t x) +{ + static const int32x4_t CONST_127 = vdupq_n_s32(127); // 127 + static const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2) + + // Extract exponent + int32x4_t m = vsubq_s32(vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_f32(x), 23)), CONST_127); + float32x4_t val = vreinterpretq_f32_s32(vsubq_s32(vreinterpretq_s32_f32(x), vshlq_n_s32(m, 23))); + + // Polynomial Approximation + float32x4_t poly = vtaylor_polyq_f32(val, log_tab); + + // Reconstruct + poly = vmlaq_f32(poly, vcvtq_f32_s32(m), CONST_LN2); + + return poly; +} + +inline float32x4_t vtanhq_f32(float32x4_t val) +{ + static const float32x4_t CONST_1 = vdupq_n_f32(1.f); + static const float32x4_t CONST_2 = vdupq_n_f32(2.f); + static const float32x4_t CONST_MIN_TANH = vdupq_n_f32(-10.f); + static const float32x4_t CONST_MAX_TANH = vdupq_n_f32(10.f); + + float32x4_t x = vminq_f32(vmaxq_f32(val, CONST_MIN_TANH), CONST_MAX_TANH); + float32x4_t exp2x = vexpq_f32(vmulq_f32(CONST_2, x)); + float32x4_t num = vsubq_f32(exp2x, CONST_1); + float32x4_t den = vaddq_f32(exp2x, CONST_1); + float32x4_t tanh = vmulq_f32(num, vinvq_f32(den)); + return tanh; +} + +inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n) +{ + return vexpq_f32(vmulq_f32(n, vlogq_f32(val))); +} +} \ No newline at end of file diff --git a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h b/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h index 39f92e3..9ef93ce 100644 --- a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h +++ b/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h @@ -65,8 +65,8 @@ public: private: /** Common signature for all the specialised absolute difference functions * - * @param[in] input1 An input tensor. Data types supported: U8, S16. - * @param[in] input2 An input tensor. Data types supported: U8, S16. + * @param[in] input1 An input tensor. Data types supported: U8/S16. + * @param[in] input2 An input tensor. Data types supported: U8/S16. * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16. * @param[in] window Region on which to execute the kernel. */ diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h index ba93c59..97f92d6 100644 --- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -24,6 +24,7 @@ #ifndef __ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__ #define __ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__ +#include "arm_compute/core/FixedPoint.h" #include "arm_compute/core/NEON/INESimpleKernel.h" namespace arm_compute @@ -46,7 +47,7 @@ public: NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default; /** Set the input and output tensor. * - * @param[in] input Source tensor. Data types supported: F32. + * @param[in] input Source tensor. Data types supported: QS8/F32. * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] activation_info Activation layer information. */ @@ -66,8 +67,14 @@ private: * * @param[in] window Region on which to execute the kernel */ - template - void activation(const Window &window); + template + typename std::enable_if::value, void>::type activation(const Window &window); + /** Function to apply an activation function on a tensor. + * + * @param[in] window Region on which to execute the kernel + */ + template + typename std::enable_if::value, void>::type activation(const Window &window); private: ActivationFunctionExecutorPtr _func; diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h index 46d2292..b36ca46 100644 --- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h +++ b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h @@ -50,9 +50,9 @@ public: /** Initialise the kernel's input, output and border mode. * - * @param[in] input1 An input tensor. Data types supported: U8, S16, F32 - * @param[in] input2 An input tensor. Data types supported: U8, S16, F32 (only if @p input1 is F32). - * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F32 (only if both inputs are F32). + * @param[in] input1 An input tensor. Data types supported: U8/S16/F32 + * @param[in] input2 An input tensor. Data types supported: U8/S16/F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F32 (only if both inputs are F32). * @param[in] policy Overflow policy. */ void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); @@ -63,9 +63,9 @@ public: private: /** Common signature for all the specialised add functions * - * @param[in] input1 An input tensor. Data types supported: U8, S16, F32. - * @param[in] input2 An input tensor. Data types supported: U8, S16, F32 (only if @p input1 is F32). - * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F32 (only if both inputs are F32). + * @param[in] input1 An input tensor. Data types supported: U8/S16/F32. + * @param[in] input2 An input tensor. Data types supported: U8/S16/F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F32 (only if both inputs are F32). * @param[in] window Region on which to execute the kernel. */ using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h index a1dcb73..0eb9c23 100644 --- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h +++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h @@ -50,9 +50,9 @@ public: /** Initialise the kernel's input, output and border mode. * - * @param[in] input1 An input tensor. Data types supported: U8, S16, F32 - * @param[in] input2 An input tensor. Data types supported: U8, S16, F32 (only if @p input1 is F32). - * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F32 (only if both inputs are F32). + * @param[in] input1 An input tensor. Data types supported: U8/S16/F32 + * @param[in] input2 An input tensor. Data types supported: U8/S16/F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F32 (only if both inputs are F32). * @param[in] policy Overflow policy. */ void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h new file mode 100644 index 0000000..29fcbd2 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the batch normalization layer kernel. + */ +class NEBatchNormalizationLayerKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBatchNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchNormalizationLayerKernel(const NEBatchNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchNormalizationLayerKernel &operator=(const NEBatchNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + NEBatchNormalizationLayerKernel(NEBatchNormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + NEBatchNormalizationLayerKernel &operator=(NEBatchNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~NEBatchNormalizationLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: QS8/F32. + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using BatchNormFunction = void(const ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, const Window &window); + BatchNormFunction *_func; + const ITensor *_input; + ITensor *_output; + const ITensor *_mean; + const ITensor *_var; + const ITensor *_gamma; + const ITensor *_beta; + float _epsilon; +}; +} +#endif /*__ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NECol2ImKernel.h b/arm_compute/core/NEON/kernels/NECol2ImKernel.h index b808dc1..f6bc215 100644 --- a/arm_compute/core/NEON/kernels/NECol2ImKernel.h +++ b/arm_compute/core/NEON/kernels/NECol2ImKernel.h @@ -66,7 +66,7 @@ public: /** Set the input and output of the kernel. * - * @param[in] input The input tensor to convert. Data types supported: F32 + * @param[in] input The input tensor to convert. Data types supported: U8/S8/QS8/U16/S16/QS16/F16/U32/S32/F32 * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], * while the rest represent batch of outputs. Data types supported: Same as @p input * @param[in] convolved_dims Output convolved dimensions. @@ -77,8 +77,22 @@ public: void run(const Window &window) override; private: - const ITensor *_input; - ITensor *_output; + /** Template function to run the col2im + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_col2im(const Window &window); + + /** Common signature for all the specialised col2im functions + * + * @param[in] window Region on which to execute the kernel. + */ + using Col2ImFunctionPtr = void (NECol2ImKernel::*)(const Window &window); + + Col2ImFunctionPtr _func; + const ITensor *_input; + ITensor *_output; std::pair _convolved_dims; }; } diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h new file mode 100644 index 0000000..7384cd1 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ +#define __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class NEDepthConcatenateKernel : public INEKernel +{ +public: + /** Default constructor */ + NEDepthConcatenateKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConcatenateKernel(const NEDepthConcatenateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConcatenateKernel &operator=(const NEDepthConcatenateKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDepthConcatenateKernel(NEDepthConcatenateKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDepthConcatenateKernel &operator=(NEDepthConcatenateKernel &&) = default; + /** Default destructor */ + ~NEDepthConcatenateKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor. Data types supported: F32. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const ITensor *input, unsigned int depth_offset, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; + ITensor *_output; + int _top_bottom; + int _left_right; + unsigned int _depth_offset; +}; +} +#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h index e92e09b..0c5c29e 100644 --- a/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h @@ -43,15 +43,15 @@ public: * * Valid conversions Input -> Output : * - * - U8 -> U16, S16, U32, S32 - * - U16 -> U8, U32, S32 - * - S16 -> U8, U32, S32 - * - U32 -> U8, U16, S16 - * - S32 -> U8, U16, S16 + * - QS8 -> F32 + * - U8 -> U16, S16, S32 + * - U16 -> U8, U32 + * - S16 -> U8, S32 + * - F32 -> QS8 * * - * @param[in] input The input tensor to convert. Data types supported: U8, U16, S16, U32 or S32. - * @param[out] output The output tensor. Data types supported: U8, U16, S16, U32 or S32. + * @param[in] input The input tensor to convert. Data types supported: U8/QS8/U16/S16/F32. + * @param[out] output The output tensor. Data types supported: U8/QS8/U16/S16/U32/S32/F32. * @param[in] policy Conversion policy. * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. */ diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h new file mode 100644 index 0000000..f098e18 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ +#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; +/** NEON kernel to accumulate the biases to each element of the input tensor + * + * @note We assume bias to be shared + */ +class NEDirectConvolutionLayerBiasAccumulateKernel : public INEKernel +{ +public: + /** Default constructor */ + NEDirectConvolutionLayerBiasAccumulateKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerBiasAccumulateKernel(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerBiasAccumulateKernel &operator=(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerBiasAccumulateKernel(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerBiasAccumulateKernel &operator=(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default; + /** Default destructor */ + ~NEDirectConvolutionLayerBiasAccumulateKernel() = default; + /** Set the accumulate buffer and the biases of the kernel. + * + * @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place. + * Data type supported: QS8/F32 + * @param[in] bias The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input + * @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) + * Data type supported: Same as @p input + */ + void configure(ITensor *input, const ITensor *bias, ITensor *output = nullptr); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using BiasAccumulateKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output); + +private: + BiasAccumulateKernel *_func; + ITensor *_input; + const ITensor *_bias; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h new file mode 100644 index 0000000..d726071 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON interface for Direct Convolution Layer kernel */ +class NEDirectConvolutionLayerKernel : public INEKernel +{ +public: + /** Default constructor */ + NEDirectConvolutionLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerKernel(const NEDirectConvolutionLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerKernel &operator=(const NEDirectConvolutionLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerKernel(NEDirectConvolutionLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerKernel &operator=(NEDirectConvolutionLayerKernel &&) = default; + /** Default destructor */ + ~NEDirectConvolutionLayerKernel() = default; + /** Set the input, weights and output tensors. + * + * @param[in] input Input tensor. Data types supported: QS8/F32. + * @param[in] weights Set of kernels to convolve the input volume. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported: Same as @p input. + * @param[out] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; + const ITensor *_weights; + ITensor *_output; + PadStrideInfo _conv_info; + BorderSize _border_size; + unsigned int _kernel_size; + unsigned int _num_elems_read_per_iteration; + unsigned int _num_elems_written_per_iteration; +}; +} +#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h index 0829cc7..3ec6611 100644 --- a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h +++ b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h @@ -53,7 +53,7 @@ public: * * @note This kernel fills the borders within the XY-planes. * - * @param[in,out] tensor Tensor to process. Data types supported: U8, S16, S32, F32. + * @param[in,out] tensor Tensor to process. Data types supported: U8/S8/QS8/QS16/S16/S32/F32. * @param[in] border_size Size of the border to fill in elements. * @param[in] border_mode Border mode to use for the convolution. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. diff --git a/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h index 1c8ef32..61e6e46 100644 --- a/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h +++ b/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h @@ -53,7 +53,7 @@ public: * * @note This kernel fills the borders within the XY-planes. * - * @param[in,out] input Tensor to process. Data types supported: U8, S16, S32, F32. + * @param[in,out] input Tensor to process. Data types supported: U8/QS8/S16/S32/F32. * @param[in] border_size Size of the border to fill in elements. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. * diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h index 7790cf1..b9884ff 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h @@ -56,7 +56,7 @@ public: NEGEMMInterleave4x4Kernel(); /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input. */ void configure(const ITensor *input, ITensor *output); @@ -67,7 +67,7 @@ public: private: /** Common signature for all the transpose functions * - * @param[in] input An input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] input An input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 * @param[out] output The output tensor. Data type supported: same as @p input * @param[in] window Region on which to execute the kernel. */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h index 7d6806d..c0ecafc 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h @@ -47,7 +47,7 @@ public: ~NEGEMMMatrixAccumulateBiasesKernel() = default; /** Set the accumulate buffer and the biases of the kernel. * - * @param[in, out] accum The accumulate tensor to convert. Data type supported: F32 + * @param[in, out] accum The accumulate tensor to convert. Data type supported: QS8/F32 * @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type supported: Same as @p input */ void configure(ITensor *accum, const ITensor *biases); diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h index d1eccec..1ab52fa 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h @@ -55,17 +55,27 @@ public: * * @note The input and output tensor must have the same dimensions * - * @param[in] input Input tensor (Matrix C). Data types supported: F32, F16. + * @param[in] input Input tensor (Matrix C). Data types supported: QS8/F16/F32 * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. * @param[in] beta Weight of matrix C */ - void configure(const ITensor *input, ITensor *output, const float beta); + void configure(const ITensor *input, ITensor *output, float beta); // Inherited methods overridden: void run(const Window &window) override; private: - float _beta; + /** Common signature for all the matrix addition functions + * + * @param[in] input An input tensor. Data types supported: QS8/F16/F32 + * @param[out] output The output tensor. Data type supported: same as @p input + * @param[in] window Region on which to execute the kernel. + * @param[in] beta Weight of matrix C + */ + using MatrixAdditionFunction = void(const ITensor *input, ITensor *output, const Window &window, float beta); + /** Matrix addition function to use for the particular tensor types passed to configure() */ + MatrixAdditionFunction *_func; + float _beta; }; } #endif /* __ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h index f45fb0f..a684945 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h @@ -54,7 +54,7 @@ public: * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel * These two kernels change the layout of the original matrices to be more cache-friendly. * - * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F32, F16. + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h index 416b55f..5d8a369 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h @@ -30,9 +30,9 @@ namespace arm_compute { class ITensor; -/** NEON kernel which transposes the elements of a matrix in chunks of 1x4 if the input data type is F32 or in chunks of 1x8 if the input data type is F16. +/** NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor) * - * Following an example of how the transposition1xW works when the input data type is F32 + * Following an example of how the transposition1xW works when the input data is F32 * * @f[ * \left( \begin{array}{cccc} @@ -62,8 +62,7 @@ class ITensor; * \end{array} \right) * @f] * - * @note If the input data type is F32, the output matrix will have the following shape: [ height * 4, width / 4 ] - * @note If the input data type is F16, the output matrix will have the following shape: [ height * 8, width / 8 ] + * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) * */ class NEGEMMTranspose1xWKernel : public INESimpleKernel @@ -71,7 +70,7 @@ class NEGEMMTranspose1xWKernel : public INESimpleKernel public: /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data types supported: F32, 16. + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 * @param[out] output Output tensor. Data type supported: same as @p input. */ void configure(const ITensor *input, ITensor *output); diff --git a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h index 24fa032..dd85778 100644 --- a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h +++ b/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h @@ -53,7 +53,7 @@ public: * * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 - * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[out] output Output tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell * @param[in] hog_info HOG's metadata */ void configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info); diff --git a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h index bda213b..e56d1e5 100644 --- a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h +++ b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h @@ -79,6 +79,7 @@ private: size_t _block_stride_height; size_t _detection_window_width; size_t _detection_window_height; + size_t _max_num_detection_windows; std::mutex _mutex; }; } diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h index 3bcd686..0abd73e 100644 --- a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h +++ b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h @@ -57,7 +57,7 @@ public: public: /** Setup the kernel parameters * - * @param[in] input1 Source image (gradient X). Data types supported: S16, S32 + * @param[in] input1 Source image (gradient X). Data types supported: S16/S32 * @param[in] input2 Source image (gradient Y). Data types supported: same as @ input1 * @param[out] output Destination image (harris score). Data types supported: F32 * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h index ba5077a..ebaafb4 100644 --- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h +++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h @@ -72,7 +72,7 @@ public: /** Set the input and output of the kernel. * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F32 + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/F32 * @param[out] output The output tensor. Data types supported: Same as @p input * @param[in] convolved_dims The convolved output dimensions. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. @@ -84,15 +84,17 @@ public: void run(const Window &window) override; private: - /** Run the im2col used for the convolution layer case + /** Template function to run the im2col optimised for the fully connected layer case * * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). */ + template void run_reduced(const Window &window); - /** Run the im2col optimised for the fully connected layer case + /** Template function to run the im2col used for the convolution layer case * * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). */ + template void run_generic(const Window &window); /** Common signature for all the specialised im2col functions * diff --git a/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h new file mode 100644 index 0000000..d4bff66 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply each row of first tensor with low 2 dimensions of second tensor. */ +class NELocallyConnectedMatrixMultiplyKernel : public INEKernel +{ +public: + /** Default constructor */ + NELocallyConnectedMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELocallyConnectedMatrixMultiplyKernel(const NELocallyConnectedMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELocallyConnectedMatrixMultiplyKernel &operator=(const NELocallyConnectedMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NELocallyConnectedMatrixMultiplyKernel(NELocallyConnectedMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NELocallyConnectedMatrixMultiplyKernel &operator=(NELocallyConnectedMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input and output + * + * @param[in] input0 First input tensor. Data types supported: F32 + * @param[in] input1 Second input tensor containing the Matrix B. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; +}; +} +#endif /* __ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h index bd84810..0daae59 100644 --- a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h @@ -54,7 +54,7 @@ public: /** Initialise the kernel's sources, destinations and border mode. * - * @param[in] input Source tensor. Data types supported: U8, F32 + * @param[in] input Source tensor. Data types supported: U8/F32 * @param[out] output Destination tensor. Data types supported: same as @p input * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ @@ -86,7 +86,7 @@ class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kern public: /** Initialise the kernel's sources, destinations and border mode. * - * @param[in] input Source tensor. Data types supported: U8, F32. + * @param[in] input Source tensor. Data types supported: U8/F32. * @param[out] output Destination tensor. Data types supported: same as @p input * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h index 18d198c..d4e36d5 100644 --- a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h @@ -47,11 +47,10 @@ public: NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default; /** Default destructor */ ~NENormalizationLayerKernel() = default; - /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: F32. + * and an optional 4th dimension for batch of inputs. Data types supported: QS8/F32. * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], * Data type supported: same as @p input * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input @@ -64,17 +63,34 @@ public: BorderSize border_size() const override; private: - /** Function to perform normalization depending on the given templates dimension. + /** Function to perform normalization depending on the given template + * dimension. The second template parameter specifies whether the + * normalization has to be 1D or 2D. * - * @note Only normalization across X and Z is currently supported and tested. + * @note Only supported normalizations are: + * - 1D over X or Z + * - 2D over X and Y * - * @param window Region on which to execute the kernel. + * @param[in] window Region on which to execute the kernel. */ - template + template void normalize(const Window &window); + + /** Function to perform normalization for fixed-point values depending on + * the given template dimension. The second template parameter specifies + * whether the normalization has to be 1D or 2D. + * + * @note Only supported normalizations are: + * - 1D over X or Z + * - 2D over X and Y + * + * @param[in] window Region on which to execute the kernel. + */ + template + void normalize_fixed_point(const Window &window); /** Common signature for all the specialised normalization functions * - * @param window Region on which to execute the kernel. + * @param[in] window Region on which to execute the kernel. */ using NormalizationFunction = void (NENormalizationLayerKernel::*)(const Window &window); diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h index 0891d0c..7e402cd 100644 --- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h +++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h @@ -47,15 +47,14 @@ public: NEPixelWiseMultiplicationKernel &operator=(NEPixelWiseMultiplicationKernel &&) = default; /** Default destructor */ ~NEPixelWiseMultiplicationKernel() = default; - /** Initialise the kernel's input, output and border mode. * * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * - * @param[in] input1 An input tensor. Data types supported: U8, S16, F32. - * @param[in] input2 An input tensor. Data types supported: U8, S16, F32. - * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F32. + * @param[in] input1 An input tensor. Data types supported: U8/QS8/S16/F32. + * @param[in] input2 An input tensor. Data types supported: U8/QS8/S16/F32. + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8) /S16/F32. * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. * @param[in] overflow_policy Overflow policy. @@ -71,19 +70,29 @@ private: * * @param[in] input1_ptr Pointer to the first input tensor. * @param[in] input2_ptr Pointer to the second input tensor. - * @param[out] output_ptr Pointer to the output tensor + * @param[out] output_ptr Pointer to the output tensor. */ using MulFunctionInt = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int scale); + /** Common signature for all the specialised multiplication functions with fixed-point values + * + * @param[in] input1_ptr Pointer to the first input tensor. + * @param[in] input2_ptr Pointer to the second input tensor. + * @param[in] scale Scaling factor. + * @param[in] fixed_point_position Fixed-point position that expresses the number of bits for the fractional part of the number. + * @param[out] output_ptr Pointer to the output tensor. + */ + using MulFunctionQInt = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int scale, int fixed_point_position); /** Common signature for all the specialised multiplication functions with float scaling factor * * @param[in] input1_ptr Pointer to the first input tensor. * @param[in] input2_ptr Pointer to the second input tensor. - * @param[out] output_ptr Pointer to the output tensor + * @param[out] output_ptr Pointer to the output tensor. */ using MulFunctionFloat = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale); MulFunctionFloat *_func_float; MulFunctionInt *_func_int; + MulFunctionQInt *_func_q_int; private: const ITensor *_input1; diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h index 728b2ff..62a0878 100644 --- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -46,10 +46,9 @@ public: NEPoolingLayerKernel &operator=(NEPoolingLayerKernel &&) = default; /** Default destructor */ ~NEPoolingLayerKernel() = default; - /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: F32. + * @param[in] input Source tensor. Data types supported: QS8/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ @@ -66,14 +65,28 @@ private: * @param[in] window Output region on which to execute the kernel. */ template - void pooling2(const Window &window_input, const Window &window); + void pooling2_f32(const Window &window_input, const Window &window); + /** Function to perform 2x2 pooling for 8bit fixed point. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling2_q8(const Window &window_input, const Window &window); /** Function to perform 3x3 pooling. * * @param[in] window_input Input region on which to execute the kernel. * @param[in] window Output region on which to execute the kernel. */ template - void pooling3(const Window &window_input, const Window &window); + void pooling3_f32(const Window &window_input, const Window &window); + /** Function to perform 3x3 pooling for 8bit fixed point. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling3_q8(const Window &window_input, const Window &window); /** Common signature for all the specialised Pooling functions * * @param[in] window_input Input region on which to execute the kernel. @@ -86,6 +99,7 @@ private: const ITensor *_input; ITensor *_output; PoolingLayerInfo _pool_info; + int _num_elems_processed_per_iteration; BorderSize _border_size; }; } diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h index 0f11e7e..03e2652 100644 --- a/arm_compute/core/NEON/kernels/NEScaleKernel.h +++ b/arm_compute/core/NEON/kernels/NEScaleKernel.h @@ -52,11 +52,11 @@ public: * * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor * - * @param[in] input Source tensor. Data types supported: U8 or S16. + * @param[in] input Source tensor. Data types supported: U8/S16. * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. - * @param[out] output Destination tensor. Data types supported: U8 or S16. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[out] output Destination tensor. Data types supported: U8/S16. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] policy Interpolation type to use * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ diff --git a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h index 83d55d3..ab626ad 100644 --- a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h @@ -39,7 +39,7 @@ public: NELogits1DMaxKernel(); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: F32. + * @param[in] input Source tensor. Data types supported: QS8, F32. * @param[out] output Destination tensor. Data types supported: same as @p input */ void configure(const ITensor *input, ITensor *output); @@ -49,7 +49,11 @@ public: BorderSize border_size() const override; private: - BorderSize _border_size; + using Logits1DMaxFunction = void(const ITensor *in, ITensor *out, const Window &window); + +private: + Logits1DMaxFunction *_func; + BorderSize _border_size; }; /** Interface for shifting the logits values around the max value and exponentiating the result */ @@ -68,10 +72,9 @@ public: NELogits1DShiftExpSumKernel &operator=(NELogits1DShiftExpSumKernel &&) = default; /** Default destructor */ ~NELogits1DShiftExpSumKernel() = default; - /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: F32. + * @param[in] input Source tensor. Data types supported: QS8, F32. * @param[in] max Max values tensor. Data types supported: same as @p input. * @param[out] output Destination tensor. Data types supported: same as @p input. * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input. @@ -80,14 +83,16 @@ public: // Inherited methods overridden: void run(const Window &window) override; - BorderSize border_size() const override; private: - const ITensor *_input; - const ITensor *_max; - ITensor *_output; - ITensor *_sum; - BorderSize _border_size; + using Logits1DShiftExpSumFunction = void(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window); + +private: + Logits1DShiftExpSumFunction *_func; + const ITensor *_input; + const ITensor *_max; + ITensor *_output; + ITensor *_sum; }; /** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ @@ -106,10 +111,9 @@ public: NELogits1DNormKernel &operator=(NELogits1DNormKernel &&) = default; /** Default destructor */ ~NELogits1DNormKernel() = default; - /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: F32. + * @param[in] input Source tensor. Data types supported: QS8, F32. * @param[in] sum Sum tensor. The number of dimensions should be dim(input)-1. Data types supported: same as @p input. * @param[out] output Destination tensor. Data types supported: same as @p input. */ @@ -119,9 +123,13 @@ public: void run(const Window &window) override; private: - const ITensor *_input; - const ITensor *_sum; - ITensor *_output; + using Logits1DNormFunction = void(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window); + +private: + Logits1DNormFunction *_func; + const ITensor *_input; + const ITensor *_sum; + ITensor *_output; }; } #endif /*__ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NETableLookupKernel.h b/arm_compute/core/NEON/kernels/NETableLookupKernel.h index 499b87f..b3963e5 100644 --- a/arm_compute/core/NEON/kernels/NETableLookupKernel.h +++ b/arm_compute/core/NEON/kernels/NETableLookupKernel.h @@ -47,7 +47,7 @@ public: NETableLookupKernel &operator=(NETableLookupKernel &&) = default; /** Initialise the kernel's input, lut and output. * - * @param[in] input An input tensor. Data types supported: U8, S16. + * @param[in] input An input tensor. Data types supported: U8/S16. * @param[in] lut The input LUT. * @param[out] output The output tensor. Data types supported: same as @p input */ diff --git a/arm_compute/core/NEON/kernels/NETransposeKernel.h b/arm_compute/core/NEON/kernels/NETransposeKernel.h index 4d82383..ac9449f 100644 --- a/arm_compute/core/NEON/kernels/NETransposeKernel.h +++ b/arm_compute/core/NEON/kernels/NETransposeKernel.h @@ -53,7 +53,7 @@ public: /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ITensor *input, ITensor *output); @@ -64,7 +64,7 @@ public: private: /** Common signature for all the transpose functions * - * @param[in] input An input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] input An input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 * @param[out] output The output tensor. Data type supported: same as @p input * @param[in] window Region on which to execute the kernel. */ diff --git a/arm_compute/core/NEON/kernels/NEConvolutionLayerWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h similarity index 65% rename from arm_compute/core/NEON/kernels/NEConvolutionLayerWeightsReshapeKernel.h rename to arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h index 6057b2f..cad2d00 100644 --- a/arm_compute/core/NEON/kernels/NEConvolutionLayerWeightsReshapeKernel.h +++ b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __ARM_COMPUTE_NECONVOLUTIONLAYERWEIGHTSRESHAPEKERNEL_H__ -#define __ARM_COMPUTE_NECONVOLUTIONLAYERWEIGHTSRESHAPEKERNEL_H__ +#ifndef __ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H__ +#define __ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H__ #include "arm_compute/core/NEON/INEKernel.h" @@ -30,7 +30,7 @@ namespace arm_compute { class ITensor; -/** NEON kernel to perform reshaping on the weights used by convolution layer. +/** NEON kernel to perform reshaping on the weights used by convolution and locally connected layer * * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. * In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication. @@ -53,27 +53,28 @@ class ITensor; * \end{array} \right) * @f] */ -class NEConvolutionLayerWeightsReshapeKernel : public INEKernel +class NEWeightsReshapeKernel : public INEKernel { public: - /** Default constructor */ - NEConvolutionLayerWeightsReshapeKernel(); + /** Constructor.*/ + NEWeightsReshapeKernel(); /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionLayerWeightsReshapeKernel(const NEConvolutionLayerWeightsReshapeKernel &) = delete; + NEWeightsReshapeKernel(const NEWeightsReshapeKernel &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionLayerWeightsReshapeKernel &operator=(const NEConvolutionLayerWeightsReshapeKernel &) = delete; + NEWeightsReshapeKernel &operator=(const NEWeightsReshapeKernel &) = delete; /** Allow instances of this class to be moved */ - NEConvolutionLayerWeightsReshapeKernel(NEConvolutionLayerWeightsReshapeKernel &&) = default; + NEWeightsReshapeKernel(NEWeightsReshapeKernel &&) = default; /** Allow instances of this class to be moved */ - NEConvolutionLayerWeightsReshapeKernel &operator=(NEConvolutionLayerWeightsReshapeKernel &&) = default; + NEWeightsReshapeKernel &operator=(NEWeightsReshapeKernel &&) = default; /** Default destructor */ - ~NEConvolutionLayerWeightsReshapeKernel() = default; - + ~NEWeightsReshapeKernel() = default; /** Set the input and output of the kernel. * - * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data types supported: F32 - * @param[in] bias The shared bias tensor to append. Biases are 1D tensor with dimensions [OFM]. Data types supported: Same as @p input - * @param[out] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QS8/F32 + * @param[in] bias The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input + * @param[out] output The output tensor. Data types supported: Same as @p input */ void configure(const ITensor *input, const ITensor *bias, ITensor *output); @@ -81,11 +82,13 @@ public: void run(const Window &window) override; private: - const ITensor *_input; - const ITensor *_bias; - ITensor *_output; - bool _has_bias; + using WeightsReshapeKernel = void(const ITensor *input, const ITensor *bias, ITensor *output, const Window &window); + + WeightsReshapeKernel *_func; + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; }; } -#endif /*__ARM_COMPUTE_NECONVOLUTIONLAYERWEIGHTSRESHAPEKERNEL_H__ */ +#endif /*__ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H__ */ diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h index 8a45444..b4912ce 100644 --- a/arm_compute/core/PixelValue.h +++ b/arm_compute/core/PixelValue.h @@ -101,6 +101,7 @@ public: uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ float f32; /**< Single channel float 32 */ uint8_t u8; /**< Single channel U8 */ + int8_t s8; /**< Single channel S8 */ uint16_t u16; /**< Single channel U16 */ int16_t s16; /**< Single channel S16 */ uint32_t u32; /**< Single channel U32 */ @@ -114,6 +115,14 @@ public: { v = value.u8; } + /** Interpret the pixel value as a S8 + * + * @param[out] v Returned value + */ + void get(int8_t &v) const + { + v = value.s8; + } /** Interpret the pixel value as a U16 * * @param[out] v Returned value diff --git a/arm_compute/core/Steps.h b/arm_compute/core/Steps.h index 4706c81..33a88a2 100644 --- a/arm_compute/core/Steps.h +++ b/arm_compute/core/Steps.h @@ -40,7 +40,6 @@ namespace arm_compute class Steps : public Dimensions { public: -#ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */ /** Constructor to initialize the steps. * * @param[in] steps Values to initialize the steps. @@ -52,7 +51,6 @@ public: // Initialize empty dimensions to 1 std::fill(_id.begin() + _num_dimensions, _id.end(), 1); } -#endif /** Allow instances of this class to be copy constructed */ constexpr Steps(const Steps &) = default; /** Allow instances of this class to be copied */ diff --git a/arm_compute/core/Strides.h b/arm_compute/core/Strides.h index efdeb11..329fafb 100644 --- a/arm_compute/core/Strides.h +++ b/arm_compute/core/Strides.h @@ -38,7 +38,6 @@ namespace arm_compute class Strides : public Dimensions { public: -#ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */ /** Constructor to initialize the strides. * * @param[in] strides Values to initialize the strides. @@ -48,7 +47,6 @@ public: : Dimensions{ strides... } { } -#endif /** Allow instances of this class to be copy constructed */ constexpr Strides(const Strides &) = default; /** Allow instances of this class to be copied */ diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h new file mode 100644 index 0000000..e2532fd --- /dev/null +++ b/arm_compute/core/SubTensorInfo.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SUBTENSORINFO_H__ +#define __ARM_COMPUTE_SUBTENSORINFO_H__ + +#include "arm_compute/core/ITensorInfo.h" + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Strides.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Validate.h" + +#include + +namespace arm_compute +{ +/** Store the sub tensor's metadata */ +class SubTensorInfo final : public ITensorInfo +{ +public: + /** Default constructor */ + SubTensorInfo(); + /** Default constructor + * + * @param[in] parent Metadata of parent tensor. + * @param[in] tensor_shape Tensor shape. Shape must fit inside parent's shape. + * X and Y dimensions must match the parent's ones. + * @param[in] coords Coordinates of starting element inside parent tensor. + */ + SubTensorInfo(ITensorInfo *parent, const TensorShape &tensor_shape, const Coordinates &coords); + /** Default destructor */ + ~SubTensorInfo() = default; + /** Allow instances of this class to be copy constructed */ + SubTensorInfo(const SubTensorInfo &) = default; + /** Allow instances of this class to be copied */ + SubTensorInfo &operator=(const SubTensorInfo &) = default; + /** Allow instances of this class to be move constructed */ + SubTensorInfo(SubTensorInfo &&) = default; + /** Allow instances of this class to be moved */ + SubTensorInfo &operator=(SubTensorInfo &&) = default; + + // Inherited methods overridden: + void set_data_type(DataType data_type) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_data_type(data_type); + }; + void set_num_channels(int num_channels) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_num_channels(num_channels); + }; + void set_format(Format format) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_format(format); + }; + void set_fixed_point_position(int fixed_point_position) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_fixed_point_position(fixed_point_position); + }; + void set_tensor_shape(TensorShape shape) override; + bool auto_padding() override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->auto_padding(); + }; + bool extend_padding(const PaddingSize &padding) override; + size_t dimension(size_t index) const override + { + return _tensor_shape[index]; + } + const Strides &strides_in_bytes() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->strides_in_bytes(); + } + size_t offset_first_element_in_bytes() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->offset_element_in_bytes(_coords); + } + size_t offset_element_in_bytes(const Coordinates &pos) const override; + int fixed_point_position() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->fixed_point_position(); + } + size_t element_size() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->element_size(); + } + size_t num_dimensions() const override + { + return _tensor_shape.num_dimensions(); + } + size_t num_channels() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->num_channels(); + } + const TensorShape &tensor_shape() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _tensor_shape; + } + DataType data_type() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->data_type(); + } + Format format() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->format(); + } + size_t total_size() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->total_size(); + } + PaddingSize padding() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->padding(); + } + bool has_padding() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->has_padding(); + } + bool is_resizable() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->is_resizable(); + } + void set_is_resizable(bool is_resizable) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_is_resizable(is_resizable); + } + ValidRegion valid_region() const override + { + return _valid_region; + } + void set_valid_region(ValidRegion valid_region) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(_parent->valid_region(), valid_region); + _valid_region = std::move(valid_region); + } + +private: + ITensorInfo *_parent; + TensorShape _tensor_shape; + Coordinates _coords; + ValidRegion _valid_region; +}; +} +#endif /*__ARM_COMPUTE_SUBTENSORINFO_H__ */ diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h index b1f7db0..35b9ccb 100644 --- a/arm_compute/core/TensorInfo.h +++ b/arm_compute/core/TensorInfo.h @@ -24,6 +24,8 @@ #ifndef __ARM_COMPUTE_TENSORINFO_H__ #define __ARM_COMPUTE_TENSORINFO_H__ +#include "arm_compute/core/ITensorInfo.h" + #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorShape.h" @@ -37,13 +39,15 @@ namespace arm_compute class HOGInfo; /** Store the tensor's metadata */ -class TensorInfo +class TensorInfo final : public ITensorInfo { public: /** Default constructor */ TensorInfo(); /** Default destructor */ - virtual ~TensorInfo() = default; + ~TensorInfo() = default; + /** Allow instances of this class to be copy constructed */ + TensorInfo(const ITensorInfo &info); /** Allow instances of this class to be copy constructed */ TensorInfo(const TensorInfo &) = default; /** Allow instances of this class to be copied */ @@ -52,6 +56,15 @@ public: TensorInfo(TensorInfo &&) = default; /** Allow instances of this class to be moved */ TensorInfo &operator=(TensorInfo &&) = default; + + /** Construct a tensor info with a format. + * + * Can be used for automatic derivation of the shape by the function. + * + * @param[in] format Format of the tensor. + */ + TensorInfo(Format format); + /** 2D tensor constructor * * @param[in] width Width of the 2D tensor @@ -65,15 +78,25 @@ public: * @param[in] format Single plane format of the tensor. */ TensorInfo(const TensorShape &tensor_shape, Format format); + + /** Construct a tensor info with a data type and number of channels. + * + * Can be used for automatic derivation of the shape by the function. + * + * @param[in] num_channels It indicates the number of channels for each tensor element + * @param[in] data_type Data type to use for each tensor element + * @param[in] fixed_point_position (Optional) It specifies the fixed point position when the tensor data type is QS8, QS16 or QS32. + */ + TensorInfo(size_t num_channels, DataType data_type, size_t fixed_point_position = 0); + /** Constructor * - * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements. - * @param[in] num_channels It indicates the number of channels for each tensor element - * @param[in] data_type Data type to use for each tensor element - * @param[in] fixed_point_pos (Optional) It specifies the fixed point position when the tensor data type is INT8, INT16 or INT32. (Default = 0) - If 0, calculations are performed in integer math + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements. + * @param[in] num_channels It indicates the number of channels for each tensor element + * @param[in] data_type Data type to use for each tensor element + * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. */ - TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, size_t fixed_point_pos = 0); + TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0); /** Constructor * * @param[in] hog_info HOG's metadata used to allocate normalized HOG space @@ -81,6 +104,15 @@ public: * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on */ TensorInfo(const HOGInfo &hog_info, unsigned int width, unsigned int height); + + /** Initialize the tensor info with just a format. + * + * Can be used for automatic derivation of the shape by the function. + * + * @param[in] format Single plane format of the tensor. + */ + void init(Format format); + /** Initialize the metadata structure with the given parameters * * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. @@ -96,15 +128,25 @@ public: * @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element). */ void init(const TensorShape &tensor_shape, Format format, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, size_t total_size_in_bytes); + + /** Initialize the tensor info with just a format. + * + * Can be used for automatic derivation of the shape by the function. + * + * @param[in] num_channels Desired number of channels for each tensor element. + * @param[in] data_type Data type to use for each tensor element. + * @param[in] fixed_point_position (Optional) Fixed point position when the tensor data type is QS8, QS16 or QS32. + */ + void init(size_t num_channels, DataType data_type, size_t fixed_point_position = 0); + /** Initialize the metadata structure with the given parameters * - * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. - * @param[in] num_channels Desired number of channels for each tensor element. - * @param[in] data_type Data type to use for each tensor element. - * @param[in] fixed_point_pos (Optional) Fixed point position when the tensor data type is INT8, INT16 or INT32 (default = 0). - * If 0, calculations are performed in integer arithmetic. + * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. + * @param[in] num_channels Desired number of channels for each tensor element. + * @param[in] data_type Data type to use for each tensor element. + * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. */ - void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, size_t fixed_point_pos = 0); + void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0); /** Initialize the metadata structure with the given parameters * * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. @@ -113,11 +155,10 @@ public: * @param[in] strides_in_bytes Stride in bytes for accessing each dimension of the tensor. * @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element. * @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element). - * @param[in] fixed_point_pos (Optional) Fixed point position when the tensor data type is INT8, INT16 or INT32 (default = 0). - * If 0, calculations are performed in integer arithmetic. + * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. */ void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, - size_t total_size_in_bytes, size_t fixed_point_pos = 0); + size_t total_size_in_bytes, int fixed_point_position = 0); /** Initialize the metadata structure for the given HOG's metadata * * @param[in] hog_info HOG's metadata used to allocate normalized HOG space @@ -140,15 +181,14 @@ public: * * @note The padding used by this method is really conservative so that the tensor can be used for most functions. * - * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements - * @param[in] num_channels It indicates the number of channels for each tensor element - * @param[in] data_type Data type to use for each tensor element - * @param[in] fixed_point_pos (Optional) It specifies the fixed point position when the tensor data type is INT8, INT16 or INT32. (Default = 0) - * If 0, calculations are performed in integer math + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements + * @param[in] num_channels It indicates the number of channels for each tensor element + * @param[in] data_type Data type to use for each tensor element + * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. * * @return Total allocation size including padding in bytes. */ - size_t init_auto_padding(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, size_t fixed_point_pos = 0); + size_t init_auto_padding(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0); /** Initialize the metadata structure for the given HOG's metadata * * @note init_auto_padding will be used for the tensor initialization. @@ -158,167 +198,81 @@ public: * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on */ size_t init_auto_padding(const HOGInfo &hog_info, unsigned int width, unsigned int height); - /** Update the offset to the first element and the strides to automatically computed values. - * - * @note The padding used by this method is really conservative so that the tensor can be used for most functions. - * - * @return True if the strides or the offset to the first element have changed. - */ - bool auto_padding(); - /** Update the offset to the first element, the strides and the total size. - * - * @note This function can only increase the offset, strides and total size. - * - * @param[in] padding Padding around the XY plane in number of elements. - * - * @return True if the strides, offset and total size have changed. - */ - bool extend_padding(const PaddingSize &padding); - /** Set the format of an already initialized tensor. - * - * @note The passed format must be compatible with the existing number of channels and data type of the tensor. - * - * @param[in] format Single-plane format of the tensor. - */ - void set_format(Format format); - /** Return the size of the requested dimension - * - * @param[in] index Index of the dimension - * - * @return Dimension of the requested dimension - */ - size_t dimension(size_t index) const + + // Inherited methods overridden: + void set_data_type(DataType data_type) override; + void set_num_channels(int num_channels) override; + void set_format(Format format) override; + void set_tensor_shape(TensorShape shape) override; + void set_fixed_point_position(int fixed_point_position) override; + bool auto_padding() override; + bool extend_padding(const PaddingSize &padding) override; + size_t dimension(size_t index) const override { return _tensor_shape[index]; } - /** The strides in bytes for accessing each dimension of the tensor - * - * @return Strides in bytes for each tensor dimension - */ - const Strides &strides_in_bytes() const + const Strides &strides_in_bytes() const override { return _strides_in_bytes; } - /** The offset from the beginning of the memory allocation to the first element of the tensor. - * This can be used to access efficiently elements in a 2D tensor - * - * @return The offset in bytes to access the first element of the tensor. - */ - size_t offset_first_element_in_bytes() const + size_t offset_first_element_in_bytes() const override { return _offset_first_element_in_bytes; } - /** The offset in bytes from the beginning of the memory allocation to access the element at position (x, y, z ...) - * - * @param[in] pos Vector with the coordinates of the element to access. - * The size of this vector must be equal to the number of dimensions of the tensor - * - * @return Offset in bytes from the beginning of the memory allocation to access the element (x, y, z, ...) - */ - size_t offset_element_in_bytes(const Coordinates &pos) const; - /** Fixed point position used when the tensor data type is S8, S16 or S32. - * - * @return The fixed point position - */ - size_t fixed_point_pos() const + size_t offset_element_in_bytes(const Coordinates &pos) const override; + int fixed_point_position() const override { - return _fixed_point_pos; + return _fixed_point_position; } - /** Element size in bytes calculated as data_size() * num_channels - * - * @return The size of one element in bytes - */ - size_t element_size() const + size_t element_size() const override { return data_size_from_type(_data_type) * _num_channels; } - /** The number of dimensions of the tensor (rank) - * - * @return The number of dimensions of the tensor (rank) - */ - size_t num_dimensions() const + size_t num_dimensions() const override { return _tensor_shape.num_dimensions(); } - /** The number of channels for each tensor element - * - * @return The number of channels for each tensor element - */ - size_t num_channels() const + size_t num_channels() const override { return _num_channels; } - /** Size for each dimension of the tensor - * - * @return A vector with the size for each dimension of the tensor - */ - const TensorShape &tensor_shape() const + const TensorShape &tensor_shape() const override { return _tensor_shape; } - /** Data type used for each element of the tensor - * - * @return Tensor data type - */ - DataType data_type() const + DataType data_type() const override { return _data_type; } - /** Colour format of the image - * - * @return Colour format of the image - */ - Format format() const + Format format() const override { return _format; } - /** Returns the total size of the tensor in bytes. - * - * @return Total size of the tensor in bytes. - */ - size_t total_size() const + size_t total_size() const override { return _total_size; } - /** Padding of tensor. - * - * @return Padding. - */ - PaddingSize padding() const + PaddingSize padding() const override { return _padding; } - /** Checks if the tensor has been allocated with padding or not. - * - * @return True if padding is allocated in the tensor, otherwise false. - */ - bool has_padding() const + bool has_padding() const override { return !_padding.empty(); } - /** Flag indicating whether the size of the tensor can be changed. - * - * @return True if the tensor size can be changed. - */ - bool is_resizable() const + bool is_resizable() const override { return _is_resizable; } - /** Set the flag whether the tensor size can be changed. */ - void set_is_resizable(bool is_resizable) + void set_is_resizable(bool is_resizable) override { _is_resizable = is_resizable; } - /** Valid region of the tensor. All elements in the valid region have defined values, i.e. are not undefined. - * - * @return The valid region. - */ - ValidRegion valid_region() const + ValidRegion valid_region() const override { return _valid_region; } - /** Set the valid region of the tensor. */ - void set_valid_region(ValidRegion valid_region) + void set_valid_region(ValidRegion valid_region) override { _valid_region = std::move(valid_region); } @@ -331,7 +285,7 @@ private: std::tuple calculate_padding_requirements(const PaddingSize &padding); size_t _total_size; - size_t _fixed_point_pos; + int _fixed_point_position; size_t _offset_first_element_in_bytes; Strides _strides_in_bytes; size_t _num_channels; diff --git a/arm_compute/core/TensorShape.h b/arm_compute/core/TensorShape.h index 3ac6298..f8b3181 100644 --- a/arm_compute/core/TensorShape.h +++ b/arm_compute/core/TensorShape.h @@ -38,7 +38,6 @@ namespace arm_compute class TensorShape : public Dimensions { public: -#ifndef DOXYGEN_SKIP_THIS /* Doxygen gets confused by the templates and can't match the implementation to the declaration */ /** Constructor to initialize the tensor shape. * * @param[in] dims Values to initialize the dimensions. @@ -47,10 +46,15 @@ public: TensorShape(Ts... dims) : Dimensions{ dims... } { - // Initialize empty dimensions to 1 - std::fill(_id.begin() + _num_dimensions, _id.end(), 1); + // Initialize unspecified dimensions to 1 + if(_num_dimensions > 0) + { + std::fill(_id.begin() + _num_dimensions, _id.end(), 1); + } + + // Correct number dimensions to ignore trailing dimensions of size 1 + apply_dimension_correction(); } -#endif /** Allow instances of this class to be copy constructed */ TensorShape(const TensorShape &) = default; /** Allow instances of this class to be copied */ @@ -61,15 +65,47 @@ public: TensorShape &operator=(TensorShape &&) = default; /** Default destructor */ ~TensorShape() = default; + + /** Accessor to set the value of one of the dimensions. + * + * @param[in] dimension Dimension for which the value is set. + * @param[in] value Value to be set for the dimension. + */ + void set(size_t dimension, size_t value) + { + ARM_COMPUTE_ERROR_ON(value < 1); + + // Make sure all empty dimensions are filled with 1 + std::fill(_id.begin() + _num_dimensions, _id.end(), 1); + + // Set the specified dimension and increase the number of dimensions if + // necessary + Dimensions::set(dimension, value); + + // Correct number dimensions to ignore trailing dimensions of size 1 + apply_dimension_correction(); + } + + /** Collapse the first n dimensions. + * + * @param[in] first Dimensions into which the following @p n are collapsed. + * @param[in] n Number of dimensions to collapse into @p first. + */ + void collapse(size_t n, size_t first = 0) + { + Dimensions::collapse(n, first); + + // Make sure all empty dimensions are filled with 1 + std::fill(_id.begin() + _num_dimensions, _id.end(), 1); + } + /** Collapses all dimensions to a single linear total size. * * @return The total tensor size in terms of elements. */ size_t total_size() const { - const size_t size = std::accumulate(_id.begin(), _id.end(), 1, std::multiplies()); - ARM_COMPUTE_ERROR_ON(0 == size); - return size; + return std::accumulate(_id.begin(), _id.end(), 1, std::multiplies()); } /** Collapses given dimension and above. * @@ -81,9 +117,24 @@ public: */ size_t total_size_upper(size_t dimension) const { - const size_t size = std::accumulate(_id.begin() + dimension, _id.end(), 1, std::multiplies()); - ARM_COMPUTE_ERROR_ON(0 == size); - return size; + return std::accumulate(_id.begin() + dimension, _id.end(), 1, std::multiplies()); + } + +private: + /** Remove trailing dimensions of size 1 from the reported number of dimensions. */ + void apply_dimension_correction() + { + for(int i = static_cast(_num_dimensions) - 1; i >= 0; --i) + { + if(_id[i] == 1) + { + --_num_dimensions; + } + else + { + break; + } + } } }; } diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 6188d58..725567b 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -62,8 +62,10 @@ enum class DataType UNKNOWN, U8, S8, + QS8, U16, S16, + QS16, U32, S32, U64, @@ -182,6 +184,14 @@ struct BorderSize return size; } + void limit(const BorderSize &limit) + { + top = std::min(top, limit.top); + right = std::min(right, limit.right); + bottom = std::min(bottom, limit.bottom); + left = std::min(left, limit.left); + } + unsigned int top; unsigned int right; unsigned int bottom; @@ -223,7 +233,8 @@ enum class ThresholdType enum class RoundingPolicy { TO_ZERO, /**< Truncates the least significand values that are lost in operations. */ - TO_NEAREST_EVEN /**< Rounds to nearest even output value */ + TO_NEAREST_UP, /**< Rounds to nearest value; half rounds up */ + TO_NEAREST_EVEN /**< Rounds to nearest value; half rounds to nearest even */ }; /** Termination criteria */ @@ -326,17 +337,17 @@ enum class NonLinearFilterFunction : unsigned /** The normalization type used for the normalization layer */ enum class NormType { - IN_MAP, /* Normalization applied within the same map */ - CROSS_MAP /* Normalization applied cross maps */ + IN_MAP_1D, /**< Normalization applied within the same map in 1D region */ + IN_MAP_2D, /**< Normalization applied within the same map in 2D region */ + CROSS_MAP /**< Normalization applied cross maps */ }; /** Normalization type for Histogram of Oriented Gradients (HOG) */ enum class HOGNormType { - L2_NORM, /**< L2-norm */ - L2HYS_NORM, /**< L2-norm followed by clipping */ - L1_NORM, /**< L1 norm */ - L1SQRT_NORM /**< L1 norm with SQRT */ + L2_NORM = 1, /**< L2-norm */ + L2HYS_NORM = 2, /**< L2-norm followed by clipping */ + L1_NORM = 3 /**< L1 norm */ }; /** Detection window used for the object detection. The detection window keeps the following information: @@ -497,7 +508,7 @@ class NormalizationLayerInfo public: /** Default Constructor * - * @param[in] type The normalization type. Can be @ref NormType::IN_MAP or NORM_TYPE::CROSS_MAP + * @param[in] type The normalization type. Can be @ref NormType::IN_MAP_1D, @ref NormType::IN_MAP_2D or @ref NORM_TYPE::CROSS_MAP * @param[in] norm_size The normalization size is the number of elements to normalize across. Defaults to 5. * @param[in] alpha Alpha parameter used by normalization equation. Defaults to 0.0001. * @param[in] beta Beta parameter used by normalization equation. Defaults to 0.5. @@ -527,12 +538,17 @@ public: { return _kappa; } - /** Return the scaling factor of the normalization function. If kappa is not 1 then [Krichevksy 2012] normalization scaling is specified. + /** Return the scaling factor of the normalization function. If kappa is not + * 1 then [Krichevksy 2012] normalization scaling is specified. Scaling + * factor takes into account the total number of elements used for the + * normalization, so in case of 2 dimensions this is _norm_size^2. + * * @return The normalization scaling factor. */ float scale_coeff() const { - return (_kappa == 1.f) ? (_alpha / _norm_size) : _alpha; + const uint32_t size = (_type == NormType::IN_MAP_2D) ? _norm_size * _norm_size : _norm_size; + return (_kappa == 1.f) ? (_alpha / size) : _alpha; } private: @@ -543,6 +559,38 @@ private: float _kappa; }; +/** Convolution Layer Weights Information class */ +class WeightsInfo +{ +public: + WeightsInfo() + : _are_reshaped(false), _kernel_size(0) + { + } + /** Constructor + * + * @param[in] are_reshaped True if the weights have been reshaped + * @param[in] kernel_size The size of the kernel. + */ + WeightsInfo(bool are_reshaped, unsigned int kernel_size) + : _are_reshaped(are_reshaped), _kernel_size(kernel_size) + { + } + + bool are_reshaped() const + { + return _are_reshaped; + }; + unsigned int kernel_size() const + { + return _kernel_size; + } + +private: + const bool _are_reshaped; + const unsigned int _kernel_size; +}; + /** IO formatting information class*/ struct IOFormatInfo { diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index 3ebf3ff..9d3ff0a 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -33,6 +33,7 @@ #include #include #include +#include #include namespace arm_compute @@ -101,10 +102,12 @@ inline size_t data_size_from_type(DataType data_type) { case DataType::U8: case DataType::S8: + case DataType::QS8: return 1; case DataType::U16: case DataType::S16: case DataType::F16: + case DataType::QS16: return 2; case DataType::F32: case DataType::U32: @@ -170,10 +173,13 @@ inline size_t element_size_from_data_type(DataType dt) { switch(dt) { + case DataType::S8: case DataType::U8: + case DataType::QS8: return 1; case DataType::U16: case DataType::S16: + case DataType::QS16: case DataType::F16: return 2; case DataType::U32: @@ -536,14 +542,14 @@ inline DataType data_type_for_convolution_matrix(const int16_t *conv, size_t siz /** Returns expected width and height of output scaled tensor depending on dimensions rounding mode. * - * @param width Width of input tensor (Number of columns) - * @param height Height of input tensor (Number of rows) - * @param kernel_size Kernel size. - * @param stride_x Stride of the operation in the x dimension. - * @param stride_y Stride of the operation in the y dimension. - * @param pad_x Padding size in the x dimension. - * @param pad_y Padding size in the y dimension. - * @param round_type Dimensions rounding mode. + * @param[in] width Width of input tensor (Number of columns) + * @param[in] height Height of input tensor (Number of rows) + * @param[in] kernel_size Kernel size. + * @param[in] stride_x Stride of the operation in the x dimension. + * @param[in] stride_y Stride of the operation in the y dimension. + * @param[in] pad_x Padding size in the x dimension. + * @param[in] pad_y Padding size in the y dimension. + * @param[in] round_type Dimensions rounding mode. * * @return A pair with the new width in the first position and the new height in the second. */ @@ -610,14 +616,27 @@ const std::string &string_from_interpolation_policy(InterpolationPolicy policy); * @return The string describing the border mode. */ const std::string &string_from_border_mode(BorderMode border_mode); +/** Translates a given normalization type to a string. + * + * @param[in] type @ref NormType to be translated to string. + * + * @return The string describing the normalization type. + */ +const std::string &string_from_norm_type(NormType type); /** Lower a given string. * - * @param val Given string to lower. + * @param[in] val Given string to lower. * * @return The lowered string */ -std::string lower_string(std::string val); +std::string lower_string(const std::string &val); +/** Check if a given data type is of floating point type + * + * @param[in] dt Input data type. + * + * @return True if data type is of floating point type, else false. + */ inline bool is_data_type_float(DataType dt) { switch(dt) @@ -630,6 +649,24 @@ inline bool is_data_type_float(DataType dt) } } +/** Check if a given data type is of fixed point type + * + * @param[in] dt Input data type. + * + * @return True if data type is of fixed point type, else false. + */ +inline bool is_data_type_fixed_point(DataType dt) +{ + switch(dt) + { + case DataType::QS8: + case DataType::QS16: + return true; + default: + return false; + } +} + /** Print consecutive elements to an output stream. * * @param[out] s Output stream to print the elements to. @@ -641,6 +678,8 @@ inline bool is_data_type_float(DataType dt) template void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ") { + using print_type = typename std::conditional::value, T, int>::type; + for(unsigned int i = 0; i < n; ++i) { // Set stream width as it is not a "sticky" stream manipulator @@ -648,28 +687,29 @@ void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int { s.width(stream_width); } - s << std::right << ptr[i] << element_delim; + s << std::right << static_cast(ptr[i]) << element_delim; } } /** Identify the maximum width of n consecutive elements. * - * @param[in] s The output stream which will be used to print the elements. Used to extract the stream format. - * - * @param ptr Pointer to the elements. - * @param n Number of elements. + * @param[in] s The output stream which will be used to print the elements. Used to extract the stream format. + * @param[in] ptr Pointer to the elements. + * @param[in] n Number of elements. * * @return The maximum width of the elements. */ template int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, unsigned int n) { + using print_type = typename std::conditional::value, T, int>::type; + int max_width = -1; for(unsigned int i = 0; i < n; ++i) { std::stringstream ss; ss.copyfmt(s); - ss << ptr[i]; + ss << static_cast(ptr[i]); max_width = std::max(max_width, ss.str().size()); } return max_width; diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h index 5f1c541..48eba70 100644 --- a/arm_compute/core/Validate.h +++ b/arm_compute/core/Validate.h @@ -37,6 +37,82 @@ namespace arm_compute { +namespace detail +{ +/* Check whether two dimension objects differ. + * + * @param[in] dim1 First object to be compared. + * @param[in] dim2 Second object to be compared. + * @param[in] upper_dim The dimension from which to check. + * + * @return Return true if the two objects are different. + */ +template +inline bool have_different_dimensions(const Dimensions &dim1, const Dimensions &dim2, unsigned int upper_dim) +{ + for(unsigned int i = upper_dim; i < arm_compute::Dimensions::num_max_dimensions; ++i) + { + if(dim1[i] != dim2[i]) + { + return true; + } + } + + return false; +} + +/** Functor to compare two @ref Dimensions objects and throw an error on mismatch. + * + * @param[in] dim Object to compare against. + * @param[in] function Function in which the error occured. + * @param[in] file File in which the error occured. + * @param[in] line Line in which the error occured. + */ +template +class compare_dimension +{ +public: + compare_dimension(const Dimensions &dim, const char *function, const char *file, int line) + : _dim{ dim }, _function{ function }, _file{ file }, _line{ line } + { + } + + /** Compare the given object against the stored one. + * + * @param[in] dim To be compared object. + */ + void operator()(const Dimensions &dim) + { + ARM_COMPUTE_ERROR_ON_LOC_MSG(have_different_dimensions(_dim, dim, 0), _function, _file, _line, + "Objects have different dimensions"); + } + +private: + const Dimensions &_dim; + const char *const _function; + const char *const _file; + const int _line; +}; +} // namespace detail +/** Throw an error if one of the pointers is a nullptr. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] pointers Pointers to check against nullptr. + */ +template +void error_on_nullptr(const char *function, const char *file, const int line, Ts &&... pointers) +{ + auto is_nullptr = [&](const void *ptr) + { + ARM_COMPUTE_ERROR_ON_LOC(ptr == nullptr, function, file, line); + }; + + for_each(is_nullptr, std::forward(pointers)...); +} +#define ARM_COMPUTE_ERROR_ON_NULLPTR(...) ::arm_compute::error_on_nullptr(__func__, __FILE__, __LINE__, __VA_ARGS__) + /** Throw an error if the passed window is invalid. * * The subwindow is invalid if: @@ -99,27 +175,28 @@ void error_on_window_dimensions_gte(const char *function, const char *file, cons const Window &win, unsigned int max_dim); #define ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) ::arm_compute::error_on_window_dimensions_gte(__func__, __FILE__, __LINE__, w, md) -/* Check whether two tensors have different shapes. +/** Throw an error if the passed dimension objects differ. * - * @param[in] tensor_1 First tensor to be compared - * @param[in] tensor_2 Second tensor to be compared - * - * @return Return true if the two tensors have different shapes + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] dim1 The first object to be compared. + * @param[in] dim2 The second object to be compared. + * @param[in] dims (Optional) Further allowed objects. */ -inline bool have_different_shapes(const ITensor *tensor_1, const ITensor *tensor_2) +template +void error_on_mismatching_dimensions(const char *function, const char *file, int line, + const Dimensions &dim1, const Dimensions &dim2, Ts &&... dims) { - for(size_t i = 0; i < arm_compute::Coordinates::num_max_dimensions; ++i) - { - if(tensor_1->info()->dimension(i) != tensor_2->info()->dimension(i)) - { - return true; - } - } + ARM_COMPUTE_UNUSED(function); + ARM_COMPUTE_UNUSED(file); + ARM_COMPUTE_UNUSED(line); - return false; + for_each(detail::compare_dimension(dim1, function, file, line), dim2, std::forward(dims)...); } +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(...) ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__) -/** Throw an error if the passed two tensors have different shapes +/** Throw an error if the passed two tensors have different shapes from the given dimension * * @param[in] function Function in which the error occurred. * @param[in] file Name of the file where the error occurred. @@ -132,18 +209,36 @@ template void error_on_mismatching_shapes(const char *function, const char *file, const int line, const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) { + error_on_mismatching_shapes(function, file, line, 0U, tensor_1, tensor_2, std::forward(tensors)...); +} + +/** Throw an error if the passed two tensors have different shapes from the given dimension + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] upper_dim The dimension from which to check. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_shapes(const char *function, const char *file, const int line, + unsigned int upper_dim, const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ ARM_COMPUTE_UNUSED(function); ARM_COMPUTE_UNUSED(file); ARM_COMPUTE_UNUSED(line); - ARM_COMPUTE_UNUSED(tensor_1); - ARM_COMPUTE_UNUSED(tensor_2); - const std::array tensors_array{ { std::forward(tensors)... } }; + const std::array < const ITensor *, 2 + sizeof...(Ts) > tensors_array{ { tensor_1, tensor_2, std::forward(tensors)... } }; ARM_COMPUTE_UNUSED(tensors_array); - ARM_COMPUTE_ERROR_ON_LOC_MSG(have_different_shapes(tensor_1, tensor_2) || std::any_of(tensors_array.begin(), tensors_array.end(), [&](const ITensor * tensor) + ARM_COMPUTE_ERROR_ON_LOC(tensors_array.cbegin() == nullptr, function, file, line); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_array.cbegin()), tensors_array.cend(), [&](const ITensor * tensor) { - return have_different_shapes(tensor_1, tensor); + ARM_COMPUTE_ERROR_ON_LOC(tensor == nullptr, function, file, line); + return detail::have_different_dimensions((*tensors_array.cbegin())->info()->tensor_shape(), tensor->info()->tensor_shape(), upper_dim); }), function, file, line, "Tensors have different shapes"); } @@ -183,6 +278,55 @@ void error_on_mismatching_data_types(const char *function, const char *file, con #define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...) ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__) +/** Throw an error if the passed tensors have different fixed point data types or different fixed point positions + * + * @note: If the first tensor doesn't have fixed point data type, the function returns without throwing an error + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_fixed_point(const char *function, const char *file, const int line, + const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ + ARM_COMPUTE_UNUSED(function); + ARM_COMPUTE_UNUSED(file); + ARM_COMPUTE_UNUSED(line); + ARM_COMPUTE_UNUSED(tensor_1); + ARM_COMPUTE_UNUSED(tensor_2); + + DataType &&first_data_type = tensor_1->info()->data_type(); + const int first_fixed_point_position = tensor_1->info()->fixed_point_position(); + ARM_COMPUTE_UNUSED(first_data_type); + ARM_COMPUTE_UNUSED(first_fixed_point_position); + + if((first_data_type != DataType::QS8) && (first_data_type != DataType::QS16)) + { + return; + } + + const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_array{ { tensor_2, std::forward(tensors)... } }; + ARM_COMPUTE_UNUSED(tensors_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(std::any_of(tensors_array.begin(), tensors_array.end(), [&](const ITensor * tensor) + { + return tensor->info()->data_type() != first_data_type; + }), + function, file, line, "Tensors have different fixed point data types"); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(std::any_of(tensors_array.begin(), tensors_array.end(), [&](const ITensor * tensor) + { + return tensor->info()->fixed_point_position() != first_fixed_point_position; + }), + function, file, line, "Tensors have different fixed point positions"); +} + +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(...) ::arm_compute::error_on_mismatching_fixed_point(__func__, __FILE__, __LINE__, __VA_ARGS__) + /** Throw an error if the format of the passed tensor/multi-image does not match any of the formats provided. * * @param[in] function Function in which the error occurred. @@ -229,7 +373,7 @@ void error_on_data_type_not_in(const char *function, const char *file, const int { ARM_COMPUTE_ERROR_ON_LOC(tensor == nullptr, function, file, line); - DataType &&tensor_dt = tensor->info()->data_type(); + const DataType &tensor_dt = tensor->info()->data_type(); //NOLINT ARM_COMPUTE_UNUSED(tensor_dt); ARM_COMPUTE_ERROR_ON_LOC(tensor_dt == DataType::UNKNOWN, function, file, line); @@ -343,5 +487,77 @@ void error_on_invalid_multi_hog(const char *function, const char *file, const in void error_on_unconfigured_kernel(const char *function, const char *file, const int line, const IKernel *kernel); #define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k) ::arm_compute::error_on_unconfigured_kernel(__func__, __FILE__, __LINE__, k) + +/** Throw an error if if the coordinates and shape of the subtensor are within the parent tensor. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] parent_shape Parent tensor shape + * @param[in] coords Coordinates inside the parent tensor where the first element of the subtensor is + * @param[in] shape Shape of the subtensor + */ +void error_on_invalid_subtensor(const char *function, const char *file, const int line, + const TensorShape &parent_shape, const Coordinates &coords, const TensorShape &shape); +#define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR(p, c, s) ::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, p, c, s) + +/** Throw an error if the valid region of a subtensor is not inside the valid region of the parent tensor. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] parent_valid_region Parent valid region. + * @param[in] valid_region Valid region of subtensor. + */ +void error_on_invalid_subtensor_valid_region(const char *function, const char *file, const int line, + const ValidRegion &parent_valid_region, const ValidRegion &valid_region); +#define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(pv, sv) ::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv) + +/** Throw an error if the input fixed-point positions are different. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_fixed_point_position(const char *function, const char *file, const int line, + const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ + const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_array{ { tensor_2, std::forward(tensors)... } }; + ARM_COMPUTE_UNUSED(tensors_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(std::any_of(tensors_array.begin(), tensors_array.end(), [&](const ITensor * tensor) + { + return tensor->info()->fixed_point_position() != tensor_1->info()->fixed_point_position(); + }), + function, file, line, "Tensors have different fixed-point positions"); +} +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(...) ::arm_compute::error_on_mismatching_fixed_point_position(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the fixed-point value is not representable in the specified Q format. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] value The floating point value to be checked. + * @param[in] tensor Input tensor that has information on data type and fixed-point position. + */ +template +void error_on_value_not_representable_in_fixed_point(const char *function, const char *file, int line, + float value, const ITensor *tensor) +{ + const int fixed_point_position = tensor->info()->fixed_point_position(); + const DataType dt = tensor->info()->data_type(); + const unsigned int q_max_range = 0xFFFFFFFFu >> (((sizeof(unsigned int) - element_size_from_data_type(dt)) * 8) + 1); + const float max_range = q_max_range / (static_cast(1 << fixed_point_position)); + ARM_COMPUTE_UNUSED(max_range); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(value > max_range, function, file, line, + "Value %f is not representable in %s with fixed-point position %d", value, string_from_data_type(dt).c_str(), fixed_point_position); +} +#define ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(...) ::arm_compute::error_on_value_not_representable_in_fixed_point(__func__, __FILE__, __LINE__, __VA_ARGS__) } #endif /* __ARM_COMPUTE_VALIDATE_H__*/ diff --git a/arm_compute/core/Window.h b/arm_compute/core/Window.h index 727a489..6e7ef22 100644 --- a/arm_compute/core/Window.h +++ b/arm_compute/core/Window.h @@ -30,7 +30,7 @@ #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/Utils.h" namespace arm_compute @@ -160,7 +160,7 @@ public: * @param[in] info Tensor information to copy the dimensions from. * @param[in] first_dimension Only copy dimensions which are greater or equal to this value. */ - void use_tensor_dimensions(const TensorInfo *info, size_t first_dimension = Window::DimX); + void use_tensor_dimensions(const ITensorInfo *info, size_t first_dimension = Window::DimX); /** Shift the values of a given dimension by the given shift_value * @@ -169,6 +169,14 @@ public: */ void shift(size_t dimension, int shift_value); + /** Adjust the start or end of a given dimension by the given value + * + * @param[in] dimension The dimension to adjust + * @param[in] adjust_value The adjusted value. + * @param[in] is_at_start The flag to indicate whether adjust the start or end of the dimension. + */ + void adjust(size_t dimension, int adjust_value, bool is_at_start); + /** Scale the values of a given dimension by the given scale_value * * @note The end of the window is rounded up to be a multiple of step after the scaling. @@ -273,6 +281,18 @@ public: { return slide_window_slice<3>(slice); } + /** Slide the passed 4D window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + bool slide_window_slice_4D(Window &slice) const + { + return slide_window_slice<4>(slice); + } /** Sets the ID of the thread that the window is associated with. * * @param id ID of the thread that the window is associated with. diff --git a/arm_compute/core/Window.inl b/arm_compute/core/Window.inl index 71bcaa3..75428a1 100644 --- a/arm_compute/core/Window.inl +++ b/arm_compute/core/Window.inl @@ -50,6 +50,21 @@ inline void Window::shift(const size_t dimension, const int shift_value) d = Window::Dimension(d.start() + shift_value, d.end() + shift_value, d.step()); } +inline void Window::adjust(size_t dimension, int adjust_value, bool is_at_start) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + Window::Dimension &d = _dims[dimension]; + + if(is_at_start) + { + d = Window::Dimension(d.start() + adjust_value, d.end(), d.step()); + } + else + { + d = Window::Dimension(d.start(), d.end() + adjust_value, d.step()); + } +} + inline void Window::scale(const size_t dimension, float scale_value) { ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); @@ -157,7 +172,7 @@ inline Window Window::first_slice_window() const return slice; } -inline void Window::use_tensor_dimensions(const TensorInfo *info, const size_t first_dimension) +inline void Window::use_tensor_dimensions(const ITensorInfo *info, const size_t first_dimension) { for(unsigned int n = first_dimension; n < info->num_dimensions(); ++n) { diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h index d16354f..82929ba 100644 --- a/arm_compute/runtime/CL/CLFunctions.h +++ b/arm_compute/runtime/CL/CLFunctions.h @@ -30,6 +30,7 @@ #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h" #include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h" +#include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h" #include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h" #include "arm_compute/runtime/CL/functions/CLBitwiseNot.h" #include "arm_compute/runtime/CL/functions/CLBitwiseOr.h" @@ -41,6 +42,7 @@ #include "arm_compute/runtime/CL/functions/CLColorConvert.h" #include "arm_compute/runtime/CL/functions/CLConvolution.h" #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" +#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h" #include "arm_compute/runtime/CL/functions/CLDepthConvert.h" #include "arm_compute/runtime/CL/functions/CLDerivative.h" #include "arm_compute/runtime/CL/functions/CLDilate.h" @@ -55,11 +57,16 @@ #include "arm_compute/runtime/CL/functions/CLGaussian3x3.h" #include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" #include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h" +#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h" +#include "arm_compute/runtime/CL/functions/CLHOGDetector.h" +#include "arm_compute/runtime/CL/functions/CLHOGGradient.h" +#include "arm_compute/runtime/CL/functions/CLHOGMultiDetection.h" #include "arm_compute/runtime/CL/functions/CLHarrisCorners.h" #include "arm_compute/runtime/CL/functions/CLHistogram.h" #include "arm_compute/runtime/CL/functions/CLIntegralImage.h" #include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h" #include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h" +#include "arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h" #include "arm_compute/runtime/CL/functions/CLMagnitude.h" #include "arm_compute/runtime/CL/functions/CLMeanStdDev.h" #include "arm_compute/runtime/CL/functions/CLMedian3x3.h" diff --git a/arm_compute/runtime/CL/CLHOG.h b/arm_compute/runtime/CL/CLHOG.h new file mode 100644 index 0000000..9b4a303 --- /dev/null +++ b/arm_compute/runtime/CL/CLHOG.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOG_H__ +#define __ARM_COMPUTE_CLHOG_H__ + +#include "arm_compute/core/CL/ICLHOG.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/HOGInfo.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** OpenCL implementation of HOG data-object */ +class CLHOG : public ICLHOG +{ +public: + /** Default constructor */ + CLHOG(); + /** Allocate the HOG descriptor using the given HOG's metadata + * + * @param[in] input HOG's metadata used to allocate the HOG descriptor + */ + void init(const HOGInfo &input); + + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLHOG::map; + + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLHOG::unmap; + + // Inherited method overridden: + void free() override; + const HOGInfo *info() const override; + const cl::Buffer &cl_buffer() const override; + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + HOGInfo _info; + cl::Buffer _buffer; +}; +} +#endif /* __ARM_COMPUTE_CLHOG_H__ */ diff --git a/arm_compute/runtime/CL/CLMultiHOG.h b/arm_compute/runtime/CL/CLMultiHOG.h new file mode 100644 index 0000000..17bb4e0 --- /dev/null +++ b/arm_compute/runtime/CL/CLMultiHOG.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMULTIHOG_H__ +#define __ARM_COMPUTE_CLMULTIHOG_H__ + +#include "arm_compute/core/CL/ICLMultiHOG.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLHOG.h" + +#include + +namespace arm_compute +{ +/** Basic implementation of the CL multi HOG data-objects */ +class CLMultiHOG : public ICLMultiHOG +{ +public: + /** Constructor + * + * @param[in] num_models Number of HOG data objects to contain + * + */ + CLMultiHOG(size_t num_models); + + // Inherited methods overridden: + size_t num_models() const override; + ICLHOG *cl_model(size_t index) override; + const ICLHOG *cl_model(size_t index) const override; + +private: + size_t _num_models; + std::unique_ptr _model; +}; +} +#endif /*__ARM_COMPUTE_CLMULTIHOG_H__ */ diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h index 71baa55..8e80259 100644 --- a/arm_compute/runtime/CL/CLScheduler.h +++ b/arm_compute/runtime/CL/CLScheduler.h @@ -24,8 +24,12 @@ #ifndef __ARM_COMPUTE_CLSCHEDULER_H__ #define __ARM_COMPUTE_CLSCHEDULER_H__ +#include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/CLTypes.h" #include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" namespace arm_compute { @@ -50,7 +54,7 @@ public: void default_init() { CLKernelLibrary::get().init("./cl_kernels/", cl::Context::getDefault(), cl::Device::getDefault()); - init(cl::Context::getDefault(), cl::CommandQueue::getDefault()); + init(cl::Context::getDefault(), cl::CommandQueue::getDefault(), cl::Device::getDefault()); } /** Schedule the execution of the passed kernel if possible. * @@ -63,11 +67,14 @@ public: * * @param[in] context A CL context. * @param[in] queue A CL command queue. + * @param[in] device A CL device. */ - void init(cl::Context context = cl::Context::getDefault(), cl::CommandQueue queue = cl::CommandQueue::getDefault()) + void init(cl::Context context = cl::Context::getDefault(), cl::CommandQueue queue = cl::CommandQueue::getDefault(), + cl::Device device = cl::Device::getDefault()) { _context = std::move(context); _queue = std::move(queue); + _target = get_target_from_device(device); } /** Accessor for the associated CL context. @@ -97,6 +104,15 @@ public: return _queue; } + /** Get the target GPU. + * + * @return The target GPU. + */ + GPUTarget target() const + { + return _target; + } + /** Accessor to set the CL command queue to be used by the scheduler. * * @param[in] queue A CL command queue. @@ -106,6 +122,15 @@ public: _queue = std::move(queue); } + /** Accessor to set target GPU to be used by the scheduler. + * + * @param[in] target The target GPU. + */ + void set_target(GPUTarget target) + { + _target = target; + } + /** Blocks until all commands in the associated command queue have finished. */ void sync() { @@ -127,6 +152,7 @@ public: private: cl::Context _context; cl::CommandQueue _queue; + GPUTarget _target; }; } #endif /* __ARM_COMPUTE_CLSCHEDULER_H__ */ diff --git a/arm_compute/runtime/CL/CLSubTensor.h b/arm_compute/runtime/CL/CLSubTensor.h new file mode 100644 index 0000000..4bab164 --- /dev/null +++ b/arm_compute/runtime/CL/CLSubTensor.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSUBTENSOR_H__ +#define __ARM_COMPUTE_CLSUBTENSOR_H__ + +#include "arm_compute/core/SubTensorInfo.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include + +namespace arm_compute +{ +class ITensorInfo; + +/** Basic implementation of the OpenCL sub-tensor interface */ +class CLSubTensor : public ICLTensor +{ +public: + /** Constructor + * + * @param[in] parent Parent tensor + * @param[in] tensor_shape Shape of the subtensor + * @param[in] coords Coordinates of the first subtensor element inside the parent tensor. + */ + CLSubTensor(ICLTensor *parent, const TensorShape &tensor_shape, const Coordinates &coords); + /** Destructor: free the tensor's memory */ + ~CLSubTensor() = default; + /** Restrict instances of this class to be copy constructed */ + CLSubTensor(const CLSubTensor &) = delete; + /** Restrict instances of this class to be copied */ + CLSubTensor &operator=(const CLSubTensor &) = delete; + /** Allow instances of this class to be move constructed */ + CLSubTensor(CLSubTensor &&) = default; + /** Allow instances of this class to be moved */ + CLSubTensor &operator=(CLSubTensor &&) = default; + + /** Enqueue a map operation of the allocated buffer. + * + * @note Mapping a subtensor will lead to the mapping of the whole parent tensor for now. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLTensor::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note Unmapping a subtensor will lead to the unmapping of the whole parent tensor for now. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLTensor::unmap; + + /** Return the parent tensor of the subtensor + * + * @return Parent tensor + */ + ICLTensor *parent(); + + // Inherited methods overridden: + ITensorInfo *info() const override; + ITensorInfo *info() override; + const cl::Buffer &cl_buffer() const override; + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + ICLTensor *_parent; + mutable SubTensorInfo _info; +}; +} +#endif /*__ARM_COMPUTE_CLSUBTENSOR_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h new file mode 100644 index 0000000..d766d1c --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__ +#define __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLNormalizationLayerKernel and simulate a batch normalization layer. + * + * Batch normalization is calculated by: + * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f] + * + */ +class CLBatchNormalizationLayer : public IFunction +{ +public: + /** Default constructor */ + CLBatchNormalizationLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F32. + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon); + + // Inherited methods overridden: + void run() override; + +private: + CLBatchNormalizationLayerKernel _norm_kernel; /**< BatchNormalization layer kernel to run */ +}; +} +#endif /* __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h index 2a9b487..6a40396 100644 --- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -27,12 +27,12 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" -#include "arm_compute/core/CL/kernels/CLConvolutionLayerWeightsReshapeKernel.h" #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" #include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" +#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" @@ -42,6 +42,34 @@ namespace arm_compute { class ICLTensor; +/** Function to reshape and transpose the weights. This function calls the following kernels: + * -# @ref CLWeightsReshapeKernel + * -# @ref CLGEMMTranspose1xWKernel + */ +class CLConvolutionLayerReshapeWeights : public IFunction +{ +public: + /** Constructor */ + CLConvolutionLayerReshapeWeights(); + /** Set the input and output tensors. + * + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. + * @param[out] output Destination tensor. Data types supported: Same as @p weights. + * @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise. + * Data types supported: Same as @p weights. + */ + void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose1xW); + // Inherited methods overridden: + void run() override; + +private: + CLConvolutionLayerWeightsReshapeKernel _weights_reshape_kernel; + CLGEMMTranspose1xWKernel _weights_transposed_kernel; + CLTensor _weights_reshaped; + bool _transpose1xW; +}; + /** Basic function to compute the convolution layer. This function calls the following OpenCL kernels: * * -# @ref CLConvolutionLayerWeightsReshapeKernel (executed only once for each configuration) @@ -58,35 +86,36 @@ public: CLConvolutionLayer(); /** Set the input and output tensors. * - * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F16, F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input. - * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F16, F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights + * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); // Inherited methods overridden: void run() override; private: - CLIm2ColKernel _input_im2col_kernel; - CLConvolutionLayerWeightsReshapeKernel _weights_reshape_kernel; - CLGEMMInterleave4x4Kernel _input_interleave_kernel; - CLGEMMTranspose1xWKernel _weights_transposed_kernel; - CLGEMMMatrixMultiplyKernel _mm_kernel; - CLCol2ImKernel _output_col2im_kernel; - CLTensor _input_im2col_reshaped; - CLTensor _input_interleaved_reshaped; - CLTensor _weights_reshaped; - CLTensor _weights_transposed; - CLTensor _gemm_output; - bool _is_first_run; - bool _has_bias; - bool _is_fc; + CLConvolutionLayerReshapeWeights _reshape_weights; + CLIm2ColKernel _input_im2col_kernel; + CLGEMMInterleave4x4Kernel _input_interleave_kernel; + CLGEMMMatrixMultiplyKernel _mm_kernel; + CLCol2ImKernel _output_col2im_kernel; + CLTensor _input_im2col_reshaped; + CLTensor _input_interleaved_reshaped; + CLTensor _weights_reshaped; + CLTensor _weights_transposed; + CLTensor _gemm_output; + bool _has_bias; + bool _is_fully_connected_convolution; + bool _are_weights_reshaped; }; } #endif /* __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenate.h b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h new file mode 100644 index 0000000..3199936 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ +#define __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; +class CLDepthConcatenateKernel; +class CLFillBorderKernel; + +/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: + * + * -# @ref CLFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) + * -# @ref CLDepthConcatenateKernel + * + */ +class CLDepthConcatenate : public IFunction +{ +public: + /** Default constructor */ + CLDepthConcatenate(); + /** Initialise the kernel's inputs vector and output. + * + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F32. + * @param[out] output Output tensor. Data types supported: F32. + */ + void configure(std::vector inputs_vector, ICLTensor *output); + + // Inherited methods overridden: + void run() override; + +private: + std::vector _inputs_vector; + std::unique_ptr _concat_kernels_vector; + std::unique_ptr _border_handlers_vector; + unsigned int _num_inputs; +}; +} +#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h index 09e4fc9..826f445 100644 --- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h @@ -36,13 +36,44 @@ namespace arm_compute { +/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls the following kernels: + * + * -# @ref CLTransposeKernel (if @p transpose_weights is set to true) + * -# @ref CLGEMMTranspose1xWKernel (if @p is_batched_fc_layer is set to true) + * + * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. + */ +class CLFullyConnectedLayerReshapeWeights : public IFunction +{ +public: + /** Constructor */ + CLFullyConnectedLayerReshapeWeights(); + /** Set the input and output tensors. + * + * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights. + * @param[in] is_batched_fc_layer True if it is a batched fully connected layer + */ + void configure(const ICLTensor *input, ICLTensor *output, bool transpose_weights, bool is_batched_fc_layer); + + // Inherited methods overridden: + void run() override; + +private: + CLTransposeKernel _transpose_kernel; + CLGEMMTranspose1xWKernel _transpose1xW_kernel; + CLTensor _transpose_output; + bool _transpose_weights; + bool _is_batched_fc_layer; +}; + /** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following OpenCL kernels: * * -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer) - * -# @ref CLTransposeKernel (if @p transpose_weights is set to true) (called once) - * -# @ref NEGEMMTranspose1xWKernel (called once if we have a multi-batch input) - * -# @ref NEGEMMInterleave4x4Kernel (called if we have a multi-batch input) - * -# @ref NEGEMMMatrixMultiplyKernel + * -# @ref CLFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false) (called once) + * -# @ref CLGEMMInterleave4x4Kernel (called if we have a multi-batch input) + * -# @ref CLGEMMMatrixMultiplyKernel * -# @ref CLGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr) * * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. @@ -54,13 +85,14 @@ public: CLFullyConnectedLayer(); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data type supported: F16, F32. - * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input - * @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true. + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input + * @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true. + * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false. */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose_weights = true); + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose_weights = true, bool are_weights_reshaped = false); //Inherited methods override void run() override; @@ -71,21 +103,18 @@ private: void configure_conv_fc_wb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); void configure_conv_fc_nb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); - CLIm2ColKernel _im2col_kernel; - CLTransposeKernel _transpose_kernel; - CLGEMMTranspose1xWKernel _transpose1xW_kernel; - CLGEMMInterleave4x4Kernel _interleave4x4_kernel; - CLGEMMMatrixMultiplyKernel _mm_kernel; - CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; - CLTensor _im2col_output; - CLTensor _interleave4x4_output; - CLTensor _transpose_output; - CLTensor _transpose1xW_output; - bool _is_first_run; - bool _transpose_weights; - bool _fc_after_conv; - bool _batched_fc_layer; - bool _accumulate_biases; + CLIm2ColKernel _im2col_kernel; + CLFullyConnectedLayerReshapeWeights _reshape_weights_kernel; + CLGEMMInterleave4x4Kernel _interleave4x4_kernel; + CLGEMMMatrixMultiplyKernel _mm_kernel; + CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; + CLTensor _im2col_output; + CLTensor _interleave4x4_output; + CLTensor _reshape_weights_output; + bool _are_weights_reshaped; + bool _is_fc_after_conv; + bool _is_batched_fc_layer; + bool _accumulate_biases; }; } #endif /* __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h new file mode 100644 index 0000000..cdb23bf --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGDESCRIPTOR_H__ +#define __ARM_COMPUTE_CLHOGDESCRIPTOR_H__ + +#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLHOGGradient.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class IHOG; +/** Basic function to calculate HOG descriptor. This function calls the following OpenCL kernels: + * + * -# @ref CLHOGGradient + * -# @ref CLHOGOrientationBinningKernel + * -# @ref CLHOGBlockNormalizationKernel + * + */ +class CLHOGDescriptor : public IFunction +{ +public: + /** Default constructor */ + CLHOGDescriptor(); + /** Initialise the function's source, destination, HOG data-object and border mode + * + * @param[in, out] input Input tensor. Data type supported: U8 + * (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block + * @param[in] hog HOG data object which describes the HOG descriptor + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited method overridden: + void run() override; + +private: + CLHOGGradient _gradient; + CLHOGOrientationBinningKernel _orient_bin; + CLHOGBlockNormalizationKernel _block_norm; + CLTensor _mag; + CLTensor _phase; + CLTensor _hog_space; +}; +} + +#endif /* __ARM_COMPUTE_CLHOGDESCRIPTOR_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHOGDetector.h b/arm_compute/runtime/CL/functions/CLHOGDetector.h new file mode 100644 index 0000000..0b4fad7 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHOGDetector.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGDETECTOR_H__ +#define __ARM_COMPUTE_CLHOGDETECTOR_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +/** Basic function to execute HOG detector based on linear SVM. This function calls the following OpenCL kernel: + * + * -# @ref CLHOGDetectorKernel + * + */ +class CLHOGDetector : public IFunction +{ +public: + /** Default constructor */ + CLHOGDetector(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetector(const CLHOGDetector &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetector &operator=(const CLHOGDetector &) = delete; + /** Allow instances of this class to be moved */ + CLHOGDetector(CLHOGDetector &&) = default; + /** Allow instances of this class to be moved */ + CLHOGDetector &operator=(CLHOGDetector &&) = default; + /** Default destructor */ + ~CLHOGDetector() = default; + /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class + * + * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it. + * + * @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32 + * @param[in] hog HOG data-object that describes the HOG descriptor + * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the block stride stored in hog + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0); + + // Inherited methods overridden: + void run() override; + +private: + CLHOGDetectorKernel _hog_detector_kernel; + ICLDetectionWindowArray *_detection_windows; + cl::Buffer _num_detection_windows; +}; +} + +#endif /* __ARM_COMPUTE_CLHOGDETECTOR_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h new file mode 100644 index 0000000..e74a684 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHOGGradient.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGGRADIENT_H__ +#define __ARM_COMPUTE_CLHOGGRADIENT_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLDerivative.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +/** Basic function to calculate the gradient for HOG. This function calls the following OpenCL kernels: + * + * -# @ref CLDerivative + * -# @ref CLMagnitudePhaseKernel + * + */ +class CLHOGGradient : public IFunction +{ +public: + /** Default constructor */ + CLHOGGradient(); + /** Initialise the function's source, destinations, phase type and border mode + * + * @param[in, out] input Input tensor. Data type supported: U8. + * (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16. + * @param[out] output_phase Output tensor.(phase). Format supported: U8 + * @param[in] phase_type Type of @ref PhaseType + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited method overridden: + void run() override; + +private: + CLDerivative _derivative; + CLMagnitudePhaseKernel _mag_phase; + CLTensor _gx; + CLTensor _gy; +}; +} +#endif /*__ARM_COMPUTE_CLHOGGRADIENT_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h new file mode 100644 index 0000000..3fe0fa9 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGMULTIDETECTION_H__ +#define __ARM_COMPUTE_CLHOGMULTIDETECTION_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLMultiHOG.h" +#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h" +#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLHOGDetector.h" +#include "arm_compute/runtime/CL/functions/CLHOGGradient.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following kernels: + * + * -# @ref CLHOGGradient + * -# @ref CLHOGOrientationBinningKernel + * -# @ref CLHOGBlockNormalizationKernel + * -# @ref CLHOGDetector + * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true) + * + * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same: + * -# Phase type + -# Normalization type + -# L2 hysteresis threshold if the normalization type is L2HYS_NORM + * + */ +class CLHOGMultiDetection : public IFunction +{ +public: + /** Default constructor */ + CLHOGMultiDetection(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGMultiDetection(const CLHOGMultiDetection &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGMultiDetection &operator=(const CLHOGMultiDetection &) = delete; + /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression + * + * @param[in, out] input Input tensor. Data type supported: U8 + * (Written to only for @p border_mode != UNDEFINED) + * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect. + * This container should store the HOG data-objects in descending or ascending cell_size width order. + * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects + * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects + * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object + * The dimension of this array must be the same of multi_hog->num_models() + * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not. + * True if the non-maxima suppression stage has to be computed + * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage + * + */ + void configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode, + uint8_t constant_border_value = 0, + float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f); + + // Inherited method overridden: + void run() override; + +private: + CLHOGGradient _gradient_kernel; + std::unique_ptr _orient_bin_kernel; + std::unique_ptr _block_norm_kernel; + std::unique_ptr _hog_detect_kernel; + std::unique_ptr _non_maxima_kernel; + std::unique_ptr _hog_space; + std::unique_ptr _hog_norm_space; + ICLDetectionWindowArray *_detection_windows; + CLTensor _mag; + CLTensor _phase; + bool _non_maxima_suppression; + size_t _num_orient_bin_kernel; + size_t _num_block_norm_kernel; + size_t _num_hog_detect_kernel; +}; +} + +#endif /* __ARM_COMPUTE_CLHOGMULTIDETECTION_H__ */ \ No newline at end of file diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h new file mode 100644 index 0000000..b4e4691 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__ +#define __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" +#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" +#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to compute the locally connected layer. This function calls the following OpenCL kernels: + * + * -# @ref CLLocallyConnectedLayerWeightsReshapeKernel (executed only once for each configuration) + * -# @ref CLIm2ColKernel + * -# @ref CLLocallyConnectedMatrixMultiplyKernel + * -# @ref CLCol2ImKernel + */ +class CLLocallyConnectedLayer : public IFunction +{ +public: + /** Default constructor */ + CLLocallyConnectedLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run() override; + +private: + CLIm2ColKernel _input_im2col_kernel; + CLLocallyConnectedLayerWeightsReshapeKernel _weights_reshape_kernel; + CLLocallyConnectedMatrixMultiplyKernel _mm_kernel; + CLCol2ImKernel _output_col2im_kernel; + CLTensor _input_im2col_reshaped; + CLTensor _weights_reshaped; + CLTensor _gemm_output; + bool _is_first_run; +}; +} +#endif /* __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h index 0828af6..7a37e5e 100644 --- a/arm_compute/runtime/CPP/CPPScheduler.h +++ b/arm_compute/runtime/CPP/CPPScheduler.h @@ -24,35 +24,28 @@ #ifndef __ARM_COMPUTE_CPPSCHEDULER_H__ #define __ARM_COMPUTE_CPPSCHEDULER_H__ -#include +#include "arm_compute/runtime/IScheduler.h" + #include namespace arm_compute { -class ICPPKernel; class Thread; -/** Pool of threads to automatically split a kernel's execution among several threads. */ -class CPPScheduler +/** C++11 implementation of a pool of threads to automatically split a kernel's execution among several threads. */ +class CPPScheduler : public IScheduler { -private: - /** Constructor: create a pool of threads. */ - CPPScheduler(); - public: - /** Force the re-creation of the pool of threads to use the specified number of threads. + /** Sets the number of threads the scheduler will use to run the kernels. * - * @param[in] num_threads If set to 0, then std::thread::hardware_concurrency() threads will be used, otherwise the number of threads specified. + * @param[in] num_threads If set to 0, then the maximum number of threads supported by C++11 will be used, otherwise the number of threads specified. */ - void force_number_of_threads(int num_threads); + void set_num_threads(unsigned int num_threads) override; /** Returns the number of threads that the CPPScheduler has in his pool. * * @return Number of threads available in CPPScheduler. */ - int num_threads() const - { - return _num_threads; - } + unsigned int num_threads() const override; /** Access the scheduler singleton * * @return The scheduler @@ -65,12 +58,15 @@ public: * - The scheduler has been initialized with only one thread. * * @param[in] kernel Kernel to execute. - * @param[in] split_dimension Dimension along which to split the kernel's execution window (By default 1/Y) + * @param[in] split_dimension Dimension along which to split the kernel's execution window. */ - void multithread(ICPPKernel *kernel, size_t split_dimension = 1); + void schedule(ICPPKernel *kernel, unsigned int split_dimension) override; private: - int _num_threads; + /** Constructor: create a pool of threads. */ + CPPScheduler(); + + unsigned int _num_threads; std::unique_ptr _threads; }; } diff --git a/arm_compute/runtime/IFunction.h b/arm_compute/runtime/IFunction.h index 0cd21b9..a4e7ed1 100644 --- a/arm_compute/runtime/IFunction.h +++ b/arm_compute/runtime/IFunction.h @@ -36,7 +36,7 @@ public: * - Multi-threading is used for the kernels which are parallelisable. * - By default std::thread::hardware_concurrency() threads are used. * - * @note @ref CPPScheduler::force_number_of_threads() can be used to manually set the number of threads + * @note @ref CPPScheduler::set_num_threads() can be used to manually set the number of threads * * For OpenCL kernels: * - All the kernels are enqueued on the queue associated with CLScheduler. diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h new file mode 100644 index 0000000..39c027c --- /dev/null +++ b/arm_compute/runtime/IScheduler.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ISCHEDULER_H__ +#define __ARM_COMPUTE_ISCHEDULER_H__ + +namespace arm_compute +{ +class ICPPKernel; + +/** Scheduler interface to run kernels */ +class IScheduler +{ +public: + /** Destructor. */ + virtual ~IScheduler() = default; + /** Sets the number of threads the scheduler will use to run the kernels. + * + * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified. + */ + virtual void set_num_threads(unsigned int num_threads) = 0; + /** Returns the number of threads that the SingleThreadScheduler has in his pool. + * + * @return Number of threads available in SingleThreadScheduler. + */ + virtual unsigned int num_threads() const = 0; + /** Runs the kernel in the same thread as the caller synchronously. + * + * @param[in] kernel Kernel to execute. + * @param[in] split_dimension Dimension along which to split the kernel's execution window. + */ + virtual void schedule(ICPPKernel *kernel, unsigned int split_dimension) = 0; +}; +} +#endif /* __ARM_COMPUTE_ISCHEDULER_H__ */ diff --git a/arm_compute/runtime/MultiHOG.h b/arm_compute/runtime/MultiHOG.h index 486ae14..32bad70 100644 --- a/arm_compute/runtime/MultiHOG.h +++ b/arm_compute/runtime/MultiHOG.h @@ -29,6 +29,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/HOG.h" +#include + namespace arm_compute { /** CPU implementation of multi HOG data-object */ diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h index ef17599..daf76f3 100644 --- a/arm_compute/runtime/NEON/NEFunctions.h +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -30,6 +30,7 @@ #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" +#include "arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h" #include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h" #include "arm_compute/runtime/NEON/functions/NEBitwiseNot.h" #include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h" @@ -41,9 +42,11 @@ #include "arm_compute/runtime/NEON/functions/NEColorConvert.h" #include "arm_compute/runtime/NEON/functions/NEConvolution.h" #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h" #include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" #include "arm_compute/runtime/NEON/functions/NEDerivative.h" #include "arm_compute/runtime/NEON/functions/NEDilate.h" +#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h" #include "arm_compute/runtime/NEON/functions/NEErode.h" #include "arm_compute/runtime/NEON/functions/NEFastCorners.h" @@ -65,6 +68,7 @@ #include "arm_compute/runtime/NEON/functions/NEIntegralImage.h" #include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h" #include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h" +#include "arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEMagnitude.h" #include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h" #include "arm_compute/runtime/NEON/functions/NEMedian3x3.h" diff --git a/arm_compute/runtime/NEON/NEScheduler.h b/arm_compute/runtime/NEON/NEScheduler.h index c65d6b7..94c82b2 100644 --- a/arm_compute/runtime/NEON/NEScheduler.h +++ b/arm_compute/runtime/NEON/NEScheduler.h @@ -24,10 +24,10 @@ #ifndef __ARM_COMPUTE_NESCHEDULER_H__ #define __ARM_COMPUTE_NESCHEDULER_H__ -#include "arm_compute/runtime/CPP/CPPScheduler.h" +#include "arm_compute/runtime/Scheduler.h" namespace arm_compute { -using NEScheduler = CPPScheduler; +using NEScheduler = Scheduler; } #endif /*__ARM_COMPUTE_NESCHEDULER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h index 3fb3e20..35366e1 100644 --- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -41,7 +41,7 @@ class NEActivationLayer : public INESimpleFunction public: /** Set the input and output tensor. * - * @param[in] input Source tensor. Data type supported: F32. + * @param[in] input Source tensor. Data type supported: QS8/F32. * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] activation_info Activation layer parameters. */ diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h index 8f66a6d..8e34e98 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h @@ -35,11 +35,11 @@ class ITensor; class NEArithmeticAddition : public INESimpleFunction { public: - /** Initialise the kernel's inputs, output and convertion policy. + /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in] input1 First tensor input. Data types supported: U8 or S16. - * @param[in] input2 Second tensor input. Data types supported: U8 or S16. - * @param[out] output Output tensor. Data types supported: U8 or S16. + * @param[in] input1 First tensor input. Data types supported: U8/S16. + * @param[in] input2 Second tensor input. Data types supported: U8/S16. + * @param[out] output Output tensor. Data types supported: U8/S16. * @param[in] policy Policy to use to handle overflow. */ void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h index d0eaff7..841b591 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h @@ -35,11 +35,11 @@ class ITensor; class NEArithmeticSubtraction : public INESimpleFunction { public: - /** Initialise the kernel's inputs, output and convertion policy. + /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in] input1 First tensor input. Data types supported: U8 or S16. - * @param[in] input2 Second tensor input. Data types supported: U8 or S16. - * @param[out] output Output tensor. Data types supported: U8 or S16. + * @param[in] input1 First tensor input. Data types supported: U8/S16. + * @param[in] input2 Second tensor input. Data types supported: U8/S16. + * @param[out] output Output tensor. Data types supported: U8/S16. * @param[in] policy Policy to use to handle overflow. */ void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h new file mode 100644 index 0000000..b0b5c12 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H__ +#define __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H__ + +#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NENormalizationLayerKernel and simulate a batch normalization layer. + * + * Batch normalization is calculated by: + * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f] + * + */ +class NEBatchNormalizationLayer : public IFunction +{ +public: + /** Default constructor */ + NEBatchNormalizationLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: QS8/F32. + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon); + + // Inherited methods overridden: + void run() override; + +private: + NEBatchNormalizationLayerKernel _norm_kernel; /**< Batch normalization layer kernel */ +}; +} +#endif /* __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEConvolution.h b/arm_compute/runtime/NEON/functions/NEConvolution.h index 5c80977..1704d9f 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolution.h +++ b/arm_compute/runtime/NEON/functions/NEConvolution.h @@ -49,7 +49,7 @@ public: /** Initialize the function's source, destination, conv and border_mode. * * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[out] output Destination tensor, Data types supported: U8/S16. * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. * @param[in] border_mode Strategy to use for borders. diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h index a6862ca..a8fff8d 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -27,12 +27,12 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" -#include "arm_compute/core/NEON/kernels/NEConvolutionLayerWeightsReshapeKernel.h" #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" +#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/Tensor.h" @@ -40,11 +40,38 @@ namespace arm_compute { class ITensor; -/** Basic function to simulate a convolution layer. This function calls the following OpenCL kernels: - * -# @ref NEConvolutionLayerWeightsReshapeKernel (executed only once for each configuration) - * -# @ref NEGEMMTranspose1xWKernel (executed only once for each configuration) +/** Function to reshape and perform 1xW transposition on the weights. This function calls the following kernels: + * -# @ref NEWeightsReshapeKernel + * -# @ref NEGEMMTranspose1xWKernel (executed in case GEMM is required for the operation) + */ +class NEConvolutionLayerReshapeWeights : public IFunction +{ +public: + /** Constructor */ + NEConvolutionLayerReshapeWeights(); + /** Set the input and output tensors. + * + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QS8/F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. + * @param[out] output Destination tensor. Data types supported: Same as @p weights. + * @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise. + * Data types supported: Same as @p weights. + */ + void configure(const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose1xW); + // Inherited methods overridden: + void run() override; + +private: + NEWeightsReshapeKernel _weights_reshape_kernel; + NEGEMMTranspose1xWKernel _weights_transposed_kernel; + Tensor _weights_reshaped; + bool _transpose1xW; +}; + +/** Basic function to simulate a convolution layer. This function calls the following NEON kernels: + * -# @ref NEWeightsReshapeKernel (executed only once for each configuration) * -# @ref NEIm2ColKernel - * -# @ref NEGEMMInterleave4x4Kernel + * -# @ref NEGEMMInterleave4x4Kernel (executed only in case GEMM is required for the operation) * -# @ref NEGEMMMatrixMultiplyKernel * -# @ref NECol2ImKernel */ @@ -55,34 +82,34 @@ public: NEConvolutionLayer(); /** Set the input and output tensors. * - * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QS8/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights + * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); - + void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); // Inherited methods overridden: void run() override; private: - NEIm2ColKernel _input_im2col_kernel; - NEGEMMInterleave4x4Kernel _input_interleave_kernel; - NEConvolutionLayerWeightsReshapeKernel _weights_reshape_kernel; - NEGEMMTranspose1xWKernel _weights_transposed_kernel; - NEGEMMMatrixMultiplyKernel _mm_kernel; - NECol2ImKernel _output_col2im_kernel; - Tensor _input_im2col_reshaped; - Tensor _input_interleaved_reshaped; - Tensor _weights_reshaped; - Tensor _weights_transposed; - Tensor _gemm_output; - bool _is_first_run; - bool _has_bias; + NEIm2ColKernel _input_im2col_kernel; + NEGEMMInterleave4x4Kernel _input_interleave_kernel; + NEConvolutionLayerReshapeWeights _reshape_weights; + NEGEMMMatrixMultiplyKernel _mm_kernel; + NECol2ImKernel _output_col2im_kernel; + Tensor _input_im2col_reshaped; + Tensor _input_interleaved_reshaped; + Tensor _weights_reshaped; + Tensor _gemm_output; + bool _has_bias; + bool _is_fully_connected_convolution; + bool _are_weights_reshaped; }; } #endif /* __ARM_COMPUTE_NECONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h b/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h new file mode 100644 index 0000000..02ff122 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ +#define __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +class NEDepthConcatenateKernel; +class NEFillBorderKernel; + +/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: + * + * -# @ref NEFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) + * -# @ref NEDepthConcatenateKernel + * + */ +class NEDepthConcatenate : public IFunction +{ +public: + /** Default constructor */ + NEDepthConcatenate(); + /** Initialise the kernel's inputs vector and output. + * + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F32. + * @param[out] output Output tensor. Data types supported: F32. + */ + void configure(std::vector inputs_vector, ITensor *output); + + // Inherited methods overridden: + void run() override; + +private: + std::vector _inputs_vector; + std::unique_ptr _concat_kernels_vector; + std::unique_ptr _border_handlers_vector; + unsigned int _num_inputs; +}; +} +#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvert.h b/arm_compute/runtime/NEON/functions/NEDepthConvert.h index 21ccca3..7c59ce4 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthConvert.h +++ b/arm_compute/runtime/NEON/functions/NEDepthConvert.h @@ -48,17 +48,18 @@ public: * Input format must be different than output format. * * Valid conversions Input -> Output : - * U8 -> U16, S16, U32, S32 - * U16 -> U8, U32, S32 - * S16 -> U8, U32, S32 - * U32 -> U8, U16, S16 - * S32 -> U8, U16, S16 + * QS8 -> F32 + * U8 -> U16, S16, S32 + * U16 -> U8, U32 + * S16 -> U8, S32 + * F32 -> QS8 * * - * @param[in] input The input tensor to convert. Data type supported: U8, U16, S16, U32 or S32. - * @param[out] output The output tensor. Data type supported: U8, U16, S16, U32 or S32. + * @param[in] input The input tensor to convert. Data type supported: QS8/U8/U16/S16/F32. + * @param[out] output The output tensor. Data type supported: QS8/U8/U16/S16/U32/S32/F32. * @param[in] policy Conversion policy. * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + * It is not used on fixed point conversion. */ void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift); }; diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h new file mode 100644 index 0000000..a356cac --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ +#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ + +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h" +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +/** Function to run the direct convolution. + * + * This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel for the input + * -# @ref NEDirectConvolutionLayerBiasAccumulateKernel + * -# @ref NEDirectConvolutionLayerKernel + */ +class NEDirectConvolutionLayer : public IFunction +{ +public: + /** Constructor */ + NEDirectConvolutionLayer(); + /** Set the input, weights, biases and output tensors. + * + * @param[in, out] input Input tensor. Data types supported: QS8/F32. + * @param[in] weights Set of kernels to convolve the input volume. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported: Same as @p input. + * @param[in] bias Set of biases. Data type supported: Same as @p input. + * @param[out] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run() override; + +private: + NEDirectConvolutionLayerBiasAccumulateKernel _accumulate_bias_kernel; + NEDirectConvolutionLayerKernel _conv_kernel; + NEFillBorderKernel _input_border_handler; + Tensor _accumulator; +}; +} +#endif /* __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h index c69c285..b6b7e77 100644 --- a/arm_compute/runtime/NEON/functions/NEFillBorder.h +++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h @@ -41,7 +41,7 @@ public: * * @note This function fills the borders within the XY-planes. * - * @param[in, out] input Source tensor. Data type supported: U8, S16, S32, F32 + * @param[in, out] input Source tensor. Data type supported: U8/QS8/S16/S32/F32 * @param[in] border_width Width of the tensor border in pixels. * @param[in] border_mode Strategy to use for borders. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index 69e27b8..33ec4ef 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -36,10 +36,41 @@ namespace arm_compute { +/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels: + * + * -# @ref NETransposeKernel (if @p transpose_weights is set to true) + * -# @ref NEGEMMTranspose1xWKernel (if @p is_batched_fc_layer is set to true) + * + * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. + */ +class NEFullyConnectedLayerReshapeWeights : public IFunction +{ +public: + /** Constructor */ + NEFullyConnectedLayerReshapeWeights(); + /** Set the input and output tensors. + * + * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights. + * @param[in] is_batched_fc_layer True if it is a batched fully connected layer + */ + void configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer); + + // Inherited methods overridden: + void run() override; + +private: + NETransposeKernel _transpose_kernel; + NEGEMMTranspose1xWKernel _transpose1xW_kernel; + Tensor _transpose_output; + bool _transpose_weights; + bool _is_batched_fc_layer; +}; + /** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels: - * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) - * -# @ref NETransposeKernel (if @p transpose_weights flag is set to true) (called once) - * -# @ref NEGEMMTranspose1xWKernel (called once if we have a multi-batch input) + * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) + * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped flag is set to false) (called once) * -# @ref NEGEMMInterleave4x4Kernel (called if we have a multi-batch input) * -# @ref NEGEMMMatrixMultiplyKernel * -# @ref NEGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr) @@ -53,13 +84,14 @@ public: NEFullyConnectedLayer(); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data type supported: F32. - * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input. - * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true. + * @param[in] input Source tensor. Data type supported: QS8/F32. + * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input. + * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] transpose_weights (Optional) Transpose the weights tensor if true. Defaults to true. + * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false. */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights = true); + void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights = true, bool are_weights_reshaped = false); //Inherited methods override void run() override; @@ -70,21 +102,18 @@ private: void configure_conv_fc_wb(const ITensor *input, const ITensor *weights, ITensor *output); void configure_conv_fc_nb(const ITensor *input, const ITensor *weights, ITensor *output); - NEIm2ColKernel _im2col_kernel; - NETransposeKernel _transpose_kernel; - NEGEMMTranspose1xWKernel _transpose1xW_kernel; - NEGEMMInterleave4x4Kernel _interleave4x4_kernel; - NEGEMMMatrixMultiplyKernel _mm_kernel; - NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; - Tensor _im2col_output; - Tensor _interleave4x4_output; - Tensor _transpose_output; - Tensor _transpose1xW_output; - bool _is_first_run; - bool _transpose_weights; - bool _fc_after_conv; - bool _batched_fc_layer; - bool _accumulate_biases; + NEIm2ColKernel _im2col_kernel; + NEFullyConnectedLayerReshapeWeights _reshape_weights_kernel; + NEGEMMInterleave4x4Kernel _interleave4x4_kernel; + NEGEMMMatrixMultiplyKernel _mm_kernel; + NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; + Tensor _im2col_output; + Tensor _interleave4x4_output; + Tensor _reshape_weights_output; + bool _are_weights_reshaped; + bool _is_fc_after_conv; + bool _is_batched_fc_layer; + bool _accumulate_biases; }; } #endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index b9346e7..a40aa91 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -50,9 +50,9 @@ public: /** Initialise the kernel's inputs, output * * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. - * @note GEMM: The tensors a, b, c, d must have the same data type. All are either F32 or F16. You should not mix data types when calling this function. + * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function. * - * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: F32, F16. + * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: QS8/F16/F32 * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a * @param[out] d Output tensor. Data type supported: same as @p a diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h index 71fefbf..b911fd0 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h @@ -40,7 +40,7 @@ class NEGEMMInterleave4x4 : public INESimpleFunction public: /** Initialise the kernel's inputs, output * - * @param[in] input First input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] input First input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 * @param[out] output Output tensor. Data type supported: same as @p input */ void configure(const ITensor *input, ITensor *output); diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h index 69096fb..447b8c9 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h @@ -38,7 +38,7 @@ class NEGEMMTranspose1xW : public INESimpleFunction public: /** Initialise the kernel's inputs, output * - * @param[in] input First input tensor. Data type supported: F32, F16, U8. + * @param[in] input First input tensor. Data type supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32/ * @param[out] output Output tensor. Data type supported: same as @p input */ void configure(const ITensor *input, ITensor *output); diff --git a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h index 7487f66..699e42e 100644 --- a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h +++ b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h @@ -64,7 +64,7 @@ public: protected: NEGaussian5x5HorKernel _kernel_hor; /**< kernel for horizontal pass */ NEGaussian5x5VertKernel _kernel_vert; /**< kernel for vertical pass */ - Tensor _tmp; /** temporary buffer for output of horizontal pass */ + Tensor _tmp; /**< temporary buffer for output of horizontal pass */ NEFillBorderKernel _border_handler; /**< kernel to handle tensor borders */ }; } diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h index 46ab72c..98b8a89 100644 --- a/arm_compute/runtime/NEON/functions/NEHOGDetector.h +++ b/arm_compute/runtime/NEON/functions/NEHOGDetector.h @@ -40,6 +40,8 @@ class NEHOGDetector : public INESimpleFunction public: /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class * + * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it. + * * @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32 * @param[in] hog HOG data-object that describes the HOG descriptor * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects diff --git a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h index 9440ee0..2d07e64 100644 --- a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h +++ b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h @@ -24,10 +24,10 @@ #ifndef __ARM_COMPUTE_NEHOGMULTIDETECTION_H__ #define __ARM_COMPUTE_NEHOGMULTIDETECTION_H__ +#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" #include "arm_compute/core/IArray.h" #include "arm_compute/core/IMultiHOG.h" #include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" -#include "arm_compute/core/NEON/kernels/NEHOGNonMaximaSuppressionKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" #include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" @@ -41,7 +41,7 @@ namespace arm_compute * -# @ref NEHOGOrientationBinningKernel * -# @ref NEHOGBlockNormalizationKernel * -# @ref NEHOGDetector - * -# @ref NEHOGNonMaximaSuppressionKernel (executed if non_maxima_suppression == true) + * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true) * * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same: * -# Phase type @@ -85,20 +85,20 @@ public: void run() override; private: - NEHOGGradient _gradient_kernel; - std::unique_ptr _orient_bin_kernel; - std::unique_ptr _block_norm_kernel; - std::unique_ptr _hog_detect_kernel; - std::unique_ptr _non_maxima_kernel; - std::unique_ptr _hog_space; - std::unique_ptr _hog_norm_space; - IDetectionWindowArray *_detection_windows; - Tensor _mag; - Tensor _phase; - bool _non_maxima_suppression; - size_t _num_orient_bin_kernel; - size_t _num_block_norm_kernel; - size_t _num_hog_detect_kernel; + NEHOGGradient _gradient_kernel; + std::unique_ptr _orient_bin_kernel; + std::unique_ptr _block_norm_kernel; + std::unique_ptr _hog_detect_kernel; + std::unique_ptr _non_maxima_kernel; + std::unique_ptr _hog_space; + std::unique_ptr _hog_norm_space; + IDetectionWindowArray *_detection_windows; + Tensor _mag; + Tensor _phase; + bool _non_maxima_suppression; + size_t _num_orient_bin_kernel; + size_t _num_block_norm_kernel; + size_t _num_hog_detect_kernel; }; } diff --git a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h new file mode 100644 index 0000000..1b2b2ee --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H__ +#define __ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" +#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" +#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +class INETensor; + +/** Basic function to compute the locally connected layer. This function calls the following NEON kernels: + * + * -# @ref NEWeightsReshapeKernel (executed only once for each configuration) + * -# @ref NEIm2ColKernel + * -# @ref NELocallyConnectedMatrixMultiplyKernel + * -# @ref NECol2ImKernel + */ +class NELocallyConnectedLayer : public IFunction +{ +public: + /** Default constructor */ + NELocallyConnectedLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run() override; + +private: + NEIm2ColKernel _input_im2col_kernel; + NEWeightsReshapeKernel _weights_reshape_kernel; + NELocallyConnectedMatrixMultiplyKernel _mm_kernel; + NECol2ImKernel _output_col2im_kernel; + Tensor _input_im2col_reshaped; + Tensor _weights_reshaped; + Tensor _gemm_output; + bool _is_first_run; +}; +} +#endif /* __ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h index e60349a..82e75ee 100644 --- a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h +++ b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h @@ -48,7 +48,7 @@ public: NEMinMaxLocation(); /** Initialise the kernel's inputs and outputs. * - * @param[in] input Input image. Data types supported: U8 or S16. + * @param[in] input Input image. Data types supported: U8/S16. * @param[out] min Minimum value of image. * @param[out] max Maximum value of image. * @param[out] min_loc (Optional) Array of minimum value locations. diff --git a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h index 06e4f08..c87d722 100644 --- a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h +++ b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h @@ -45,7 +45,7 @@ public: * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT * The constant values used with CONSTANT border mode is 0 * - * @param[in, out] input Source tensor. Data type supported: U8, F32. (Written to only for @p border_mode != UNDEFINED) + * @param[in, out] input Source tensor. Data type supported: U8/F32. (Written to only for @p border_mode != UNDEFINED) * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data type supported: same as @p input * @param[in] border_mode Border mode to use for non-maxima suppression. The implementation supports just 2 border modes: UNDEFINED and CONSTANT * diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h index b7be34d..3202867 100644 --- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h @@ -52,7 +52,7 @@ public: /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data type supported: F32. Number of channels must be 1. + * and an optional 4th dimension for batch of inputs. Data type supported: QS8/F32 * @param[out] output Destination with the same dimensions, data type and number of channels of @p input * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. */ diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h index 835bd13..de7a797 100644 --- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h +++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h @@ -37,9 +37,9 @@ class NEPixelWiseMultiplication : public INESimpleFunction public: /** Initialise the kernel's inputs, output and convertion policy. * - * @param[in] input1 First tensor input. Data types supported: U8 or S16. - * @param[in] input2 Second tensor input. Data types supported: U8 or S16. - * @param[out] output Output tensor. Data types supported: U8 or S16. + * @param[in] input1 First tensor input. Data types supported: U8/QS8/S16/F32. + * @param[in] input2 Second tensor input. Data types supported: U8/QS8/S16/F32. + * @param[out] output Output tensor. Data types supported: U8/QS8/S16/F32. * @param[in] scale Scale to apply after multiplication. Must be positive. * @param[in] overflow_policy Overflow policy. * @param[in] rounding_policy Rounding policy. diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h index 5d67830..5a9cffa 100644 --- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h @@ -42,7 +42,7 @@ class NEPoolingLayer : public INESimpleFunction public: /** Set the input and output tensors. * - * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: F32. + * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h index c67b667..dc84dec 100644 --- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -50,7 +50,7 @@ public: NESoftmaxLayer(); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: F32. + * @param[in] input Source tensor. Data types supported: QS8/F32. * @param[out] output Destination tensor. Data types supported: same as @p input. */ void configure(ITensor *input, ITensor *output); @@ -63,7 +63,6 @@ private: NELogits1DShiftExpSumKernel _shift_exp_sum_kernel; NELogits1DNormKernel _norm_kernel; NEFillBorderKernel _fill_border_kernel; - NEFillBorderKernel _fill_border_kernel_sum; Tensor _max; Tensor _sum; Tensor _tmp; diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h index d2f9d30..b59ffb8 100644 --- a/arm_compute/runtime/NEON/functions/NETableLookup.h +++ b/arm_compute/runtime/NEON/functions/NETableLookup.h @@ -37,9 +37,9 @@ class NETableLookup : public INESimpleFunction public: /** Initialise the kernel's inputs and output * - * @param[in] input First tensor input. Data types supported: U8 and S16 + * @param[in] input First tensor input. Data types supported: U8/S16 * @param[in] lut Input lookup table. - * @param[out] output Output tensor. Data types supported: U8 and S16. + * @param[out] output Output tensor. Data types supported: same as @p input */ void configure(const ITensor *input, const ILut *lut, ITensor *output); }; diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h index 1b88715..4b606e7 100644 --- a/arm_compute/runtime/NEON/functions/NETranspose.h +++ b/arm_compute/runtime/NEON/functions/NETranspose.h @@ -41,7 +41,7 @@ class NETranspose : public INESimpleFunction public: /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ITensor *input, ITensor *output); diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h new file mode 100644 index 0000000..21df6a6 --- /dev/null +++ b/arm_compute/runtime/OMP/OMPScheduler.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_OMPSCHEDULER_H__ +#define __ARM_COMPUTE_OMPSCHEDULER_H__ + +#include "arm_compute/runtime/IScheduler.h" + +namespace arm_compute +{ +/** Pool of threads to automatically split a kernel's execution among several threads. */ +class OMPScheduler : public IScheduler +{ +public: + /** Sets the number of threads the scheduler will use to run the kernels. + * + * @param[in] num_threads If set to 0, then the number returned by omp_get_max_threads() will be used, otherwise the number of threads specified. + */ + void set_num_threads(unsigned int num_threads) override; + /** Returns the number of threads that the OMPScheduler has in its pool. + * + * @return Number of threads available in OMPScheduler. + */ + unsigned int num_threads() const override; + /** Access the scheduler singleton + * + * @return The scheduler + */ + static OMPScheduler &get(); + /** Multithread the execution of the passed kernel if possible. + * + * The kernel will run on a single thread if any of these conditions is true: + * - ICPPKernel::is_parallelisable() returns false + * - The scheduler has been initialized with only one thread. + * + * @param[in] kernel Kernel to execute. + * @param[in] split_dimension Dimension along which to split the kernel's execution window. + */ + void schedule(ICPPKernel *kernel, unsigned int split_dimension) override; + +private: + /** Constructor. */ + OMPScheduler(); + + unsigned int _num_threads; +}; +} +#endif /* __ARM_COMPUTE_OMPSCHEDULER_H__ */ diff --git a/arm_compute/runtime/Scheduler.h b/arm_compute/runtime/Scheduler.h new file mode 100644 index 0000000..21f944b --- /dev/null +++ b/arm_compute/runtime/Scheduler.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SCHEDULER_H__ +#define __ARM_COMPUTE_SCHEDULER_H__ + +#include "arm_compute/runtime/IScheduler.h" +#include + +namespace arm_compute +{ +/** Configurable scheduler which supports multiple multithreading APIs and choosing between different schedulers at runtime. */ +class Scheduler +{ +public: + enum class Type + { + ST, // Single thread. + CPP, // C++11 threads. + OMP, // OpenMP. + CUSTOM // Provided by the user. + }; + /** Sets the user defined scheduler and makes it the active scheduler. + * + * @param[in] scheduler A shared pointer to a custom scheduler implemented by the user. + */ + static void set(std::shared_ptr &scheduler); + /** Access the scheduler singleton. + * + * @return A reference to the scheduler object. + */ + static IScheduler &get(); + /** Set the active scheduler. + * + * Only one scheduler can be enabled at any time. + * + * @param[in] t the type of the scheduler to be enabled. + */ + static void set(Type t); + /** Returns the type of the active scheduler. + * + * @return The current scheduler's type. + */ + static Type get_type(); + /** Returns true if the given scheduler type is supported. False otherwise. + * + * @return true if the given scheduler type is supported. False otherwise. + */ + static bool is_available(Type t); + +private: + static Type _scheduler_type; + static std::shared_ptr _custom_scheduler; + Scheduler(); +}; +} +#endif /* __ARM_COMPUTE_SCHEDULER_H__ */ diff --git a/arm_compute/runtime/SingleThreadScheduler.h b/arm_compute/runtime/SingleThreadScheduler.h new file mode 100644 index 0000000..a6e1def --- /dev/null +++ b/arm_compute/runtime/SingleThreadScheduler.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SINGLETHREADSCHEDULER_H__ +#define __ARM_COMPUTE_SINGLETHREADSCHEDULER_H__ + +#include "arm_compute/runtime/IScheduler.h" + +namespace arm_compute +{ +/** Pool of threads to automatically split a kernel's execution among several threads. */ +class SingleThreadScheduler : public IScheduler +{ +public: + /** Sets the number of threads the scheduler will use to run the kernels. + * + * @param[in] num_threads This is ignored for this scheduler as the number of threads is always one. + */ + void set_num_threads(unsigned int num_threads) override; + /** Returns the number of threads that the SingleThreadScheduler has, which is always 1. + * + * @return Number of threads available in SingleThreadScheduler. + */ + unsigned int num_threads() const override; + /** Access the scheduler singleton + * + * @return The scheduler + */ + static SingleThreadScheduler &get(); + /** Runs the kernel in the same thread as the caller synchronously. + * + * @param[in] kernel Kernel to execute. + * @param[in] split_dimension Dimension along which to split the kernel's execution window. + */ + void schedule(ICPPKernel *kernel, unsigned int split_dimension) override; + +private: + /** Constructor. */ + SingleThreadScheduler() = default; +}; +} +#endif /* __ARM_COMPUTE_SINGLETHREADSCHEDULER_H__ */ diff --git a/arm_compute/runtime/SubTensor.h b/arm_compute/runtime/SubTensor.h new file mode 100644 index 0000000..bdb229d --- /dev/null +++ b/arm_compute/runtime/SubTensor.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SUBTENSOR_H__ +#define __ARM_COMPUTE_SUBTENSOR_H__ + +#include "arm_compute/core/SubTensorInfo.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensorInfo; + +/** Basic implementation of the sub-tensor interface */ +class SubTensor : public ITensor +{ +public: + /** Constructor + * + * @param[in] parent Parent tensor + * @param[in] tensor_shape Shape of the subtensor + * @param[in] coords Coordinates of the first subtensor element inside the parent tensor. + */ + SubTensor(ITensor *parent, const TensorShape &tensor_shape, const Coordinates &coords); + /** Destructor: free the tensor's memory */ + ~SubTensor() = default; + /** Restrict instances of this class to be copy constructed */ + SubTensor(const SubTensor &) = delete; + /** Restrict instances of this class to be copied */ + SubTensor &operator=(const SubTensor &) = delete; + /** Allow instances of this class to be move constructed */ + SubTensor(SubTensor &&) = default; + /** Allow instances of this class to be moved */ + SubTensor &operator=(SubTensor &&) = default; + /** Return the parent tensor of the subtensor + * + * @return Parent tensor + */ + ITensor *parent(); + + // Inherited methods overridden: + ITensorInfo *info() const override; + ITensorInfo *info() override; + uint8_t *buffer() const override; + +private: + ITensor *_parent; + mutable SubTensorInfo _info; +}; +} +#endif /*__ARM_COMPUTE_SUBTENSOR_H__ */ diff --git a/arm_compute/runtime/Tensor.h b/arm_compute/runtime/Tensor.h index e491635..1fe73a2 100644 --- a/arm_compute/runtime/Tensor.h +++ b/arm_compute/runtime/Tensor.h @@ -31,7 +31,7 @@ namespace arm_compute { -class TensorInfo; +class ITensorInfo; /** Basic implementation of the tensor interface */ class Tensor : public ITensor @@ -52,9 +52,9 @@ public: TensorAllocator *allocator(); // Inherited methods overridden: - TensorInfo *info() const override; - TensorInfo *info() override; - uint8_t *buffer() const override; + ITensorInfo *info() const override; + ITensorInfo *info() override; + uint8_t *buffer() const override; private: mutable TensorAllocator _allocator; /**< Instance of the basic CPU allocator.*/ diff --git a/arm_compute/runtime/Utils.h b/arm_compute/runtime/Utils.h new file mode 100644 index 0000000..2f037a0 --- /dev/null +++ b/arm_compute/runtime/Utils.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_RUNTIME_UTILS_H__ +#define __ARM_COMPUTE_RUNTIME_UTILS_H__ + +#include "arm_compute/runtime/Scheduler.h" + +#include + +namespace arm_compute +{ +/** Convert a Scheduler::Type into a string. + * + * @param[in] t @ref Scheduler::Type to be translated to string. + * + * @return The string describing the scheduler type. + */ +const std::string &string_from_scheduler_type(Scheduler::Type t); +} +#endif /* __ARM_COMPUTE_RUNTIME_UTILS_H__ */ diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox new file mode 100644 index 0000000..1fb94ed --- /dev/null +++ b/docs/00_introduction.dox @@ -0,0 +1,563 @@ +/** @mainpage Introduction + +@tableofcontents + +The Computer Vision and Machine Learning library is a set of functions optimised for both ARM CPUs and GPUs using SIMD technologies. + +Several builds of the library are available using various configurations: + - OS: Linux, Android or bare metal. + - Architecture: armv7a (32bit) or arm64-v8a (64bit) + - Technology: NEON / OpenCL / NEON and OpenCL + - Debug / Asserts / Release: Use a build with asserts enabled to debug your application and enable extra validation. Once you are sure your application works as expected you can switch to a release build of the library for maximum performance. + +@section S0_1_contact Contact / Support + +Please email developer@arm.com + +In order to facilitate the work of the support team please provide the build information of the library you are using. To get the version of the library you are using simply run: + + $ strings android-armv7a-cl-asserts/libarm_compute.so | grep arm_compute_version + arm_compute_version=v16.12 Build options: {'embed_kernels': '1', 'opencl': '1', 'arch': 'armv7a', 'neon': '0', 'asserts': '1', 'debug': '0', 'os': 'android', 'Werror': '1'} Git hash=f51a545d4ea12a9059fe4e598a092f1fd06dc858 + +@section S1_file_organisation File organisation + +This archive contains: + - The arm_compute header and source files + - The latest Khronos OpenCL 1.2 C headers from the Khronos OpenCL registry + - The latest Khronos cl2.hpp from the Khronos OpenCL registry (API version 2.1 when this document was written) + - The sources for a stub version of libOpenCL.so to help you build your application. + - An examples folder containing a few examples to compile and link against the library. + - A @ref utils folder containing headers with some boiler plate code used by the examples. + - This documentation. + +You should have the following file organisation: + + . + ├── arm_compute --> All the arm_compute headers + │   ├── core + │   │   ├── CL + │   │   │   ├── CLKernels.h --> Includes all the OpenCL kernels at once + │   │   │   ├── CL specialisation of all the generic objects interfaces (ICLTensor, ICLImage, etc.) + │   │   │   ├── kernels --> Folder containing all the OpenCL kernels + │   │   │   │   └── CL*Kernel.h + │   │   │   └── OpenCL.h --> Wrapper to configure the Khronos OpenCL C++ header + │   │ ├── CPP + │   │ │   └── kernels --> Folder containing all the CPP kernels + │   │   │   │   └── CPP*Kernel.h + │   │   ├── NEON + │   │   │   ├── kernels --> Folder containing all the NEON kernels + │   │   │   │   └── NE*Kernel.h + │   │   │   └── NEKernels.h --> Includes all the NEON kernels at once + │   │   ├── All common basic types (Types.h, Window, Coordinates, Iterator, etc.) + │   │   ├── All generic objects interfaces (ITensor, IImage, etc.) + │   │   └── Objects metadata classes (ImageInfo, TensorInfo, MultiImageInfo) + │   └── runtime + │   ├── CL + │   │   ├── CL objects & allocators (CLArray, CLImage, CLTensor, etc.) + │   │   ├── functions --> Folder containing all the OpenCL functions + │   │   │   └── CL*.h + │   │   └── CLFunctions.h --> Includes all the OpenCL functions at once + │   ├── CPP + │   │   └── Scheduler.h --> Basic pool of threads to execute CPP/NEON code on several cores in parallel + │   ├── NEON + │   │ ├── functions --> Folder containing all the NEON functions + │   │ │   └── NE*.h + │   │ └── NEFunctions.h --> Includes all the NEON functions at once + │   └── Basic implementations of the generic object interfaces (Array, Image, Tensor, etc.) + ├── documentation + │   ├── index.xhtml + │   └── ... + ├── documentation.xhtml -> documentation/index.xhtml + ├── examples + │   ├── cl_convolution.cpp + │   ├── neoncl_scale_median_gaussian.cpp + │   ├── neon_convolution.cpp + │   └── neon_scale.cpp + ├── include + │   └── CL + │   └── Khronos OpenCL C headers and C++ wrapper + ├── opencl-1.2-stubs + │ └── opencl_stubs.c + ├── src + │   ├── core + │ │ └── ... (Same structure as headers) + │   │ └── CL + │   │ └── cl_kernels --> All the OpenCL kernels + │ └── runtime + │ └── ... (Same structure as headers) + ├── tests + │   ├── All test related files shared between validation and benchmark + │   ├── CL --> OpenCL specific files (shared) + │   ├── NEON --> NEON specific files (shared) + │   ├── benchmark --> Sources for benchmarking + │ │ ├── Benchmark specific files + │ │ ├── main.cpp --> Entry point for benchmark test framework + │ │ ├── CL --> OpenCL benchmarking tests + │ │ └── NEON --> NEON benchmarking tests + │   ├── validation --> Sources for validation + │ │ ├── Validation specific files + │ │ ├── main.cpp --> Entry point for validation test framework + │ │ ├── CL --> OpenCL validation tests + │ │ ├── NEON --> NEON validation tests + │ │ └── UNIT --> Library validation tests + │   └── dataset --> Datasets defining common sets of input parameters + └── utils --> Boiler plate code used by examples + └── Utils.h + +@section S2_versions_changelog Release versions and changelog + +@subsection S2_1_versions Release versions + +All releases are numbered vYY.MM Where YY are the last two digits of the year, and MM the month number. +If there is more than one release in a month then an extra sequential number is appended at the end: + + v17.03 (First release of March 2017) + v17.03.1 (Second release of March 2017) + v17.04 (First release of April 2017) + +@note We're aiming at releasing one major public release with new features per quarter. All releases in between will only contain bug fixes. + +@subsection S2_2_changelog Changelog + +v17.06 Public major release + - Various bug fixes + - Added support for fixed point 8 bit (QS8) to the various NEON machine learning kernels. + - Added unit tests and benchmarks (AlexNet, LeNet) + - Added support for sub tensors. + - Added infrastructure to provide GPU specific optimisation for some OpenCL kernels. + - Added @ref arm_compute::OMPScheduler (OpenMP) scheduler for NEON + - Added @ref arm_compute::SingleThreadScheduler scheduler for NEON (For bare metal) + - User can specify his own scheduler by implementing the @ref arm_compute::IScheduler interface. + - New OpenCL kernels / functions: + - @ref arm_compute::CLBatchNormalizationLayerKernel / @ref arm_compute::CLBatchNormalizationLayer + - @ref arm_compute::CLDepthConcatenateKernel / @ref arm_compute::CLDepthConcatenate + - @ref arm_compute::CLHOGOrientationBinningKernel @ref arm_compute::CLHOGBlockNormalizationKernel, @ref arm_compute::CLHOGDetectorKernel / @ref arm_compute::CLHOGDescriptor @ref arm_compute::CLHOGDetector @ref arm_compute::CLHOGGradient @ref arm_compute::CLHOGMultiDetection + - @ref arm_compute::CLLocallyConnectedMatrixMultiplyKernel / @ref arm_compute::CLLocallyConnectedLayer + - @ref arm_compute::CLWeightsReshapeKernel / @ref arm_compute::CLConvolutionLayerReshapeWeights + - New C++ kernels: + - @ref arm_compute::CPPDetectionWindowNonMaximaSuppressionKernel + - New NEON kernels / functions: + - @ref arm_compute::NEBatchNormalizationLayerKernel / @ref arm_compute::NEBatchNormalizationLayer + - @ref arm_compute::NEDepthConcatenateKernel / @ref arm_compute::NEDepthConcatenate + - @ref arm_compute::NEDirectConvolutionLayerKernel / @ref arm_compute::NEDirectConvolutionLayer + - @ref arm_compute::NELocallyConnectedMatrixMultiplyKernel / @ref arm_compute::NELocallyConnectedLayer + - @ref arm_compute::NEWeightsReshapeKernel / @ref arm_compute::NEConvolutionLayerReshapeWeights + +v17.05 Public bug fixes release + - Various bug fixes + - Remaining of the functions ported to use accurate padding. + - Library does not link against OpenCL anymore (It uses dlopen / dlsym at runtime instead to determine whether or not OpenCL is available). + - Added "free" method to allocator. + - Minimum version of g++ required for armv7 Linux changed from 4.8 to 4.9 + +v17.04 Public bug fixes release + + The following functions have been ported to use the new accurate padding: + - @ref arm_compute::CLColorConvertKernel + - @ref arm_compute::CLEdgeNonMaxSuppressionKernel + - @ref arm_compute::CLEdgeTraceKernel + - @ref arm_compute::CLGaussianPyramidHorKernel + - @ref arm_compute::CLGaussianPyramidVertKernel + - @ref arm_compute::CLGradientKernel + - @ref arm_compute::NEChannelCombineKernel + - @ref arm_compute::NEFillArrayKernel + - @ref arm_compute::NEGaussianPyramidHorKernel + - @ref arm_compute::NEGaussianPyramidVertKernel + - @ref arm_compute::NEHarrisScoreFP16Kernel + - @ref arm_compute::NEHarrisScoreKernel + - @ref arm_compute::NEHOGDetectorKernel + - @ref arm_compute::NELogits1DMaxKernel + - @ref arm_compute::NELogits1DShiftExpSumKernel + - @ref arm_compute::NELogits1DNormKernel + - @ref arm_compute::NENonMaximaSuppression3x3FP16Kernel + - @ref arm_compute::NENonMaximaSuppression3x3Kernel + + +v17.03.1 First Major public release of the sources + - Renamed the library to arm_compute + - New CPP target introduced for C++ kernels shared between NEON and CL functions. + - New padding calculation interface introduced and ported most kernels / functions to use it. + - New OpenCL kernels / functions: + - @ref arm_compute::CLGEMMLowpMatrixMultiplyKernel / @ref arm_compute::CLGEMMLowp + - New NEON kernels / functions: + - @ref arm_compute::NENormalizationLayerKernel / @ref arm_compute::NENormalizationLayer + - @ref arm_compute::NETransposeKernel / @ref arm_compute::NETranspose + - @ref arm_compute::NELogits1DMaxKernel, @ref arm_compute::NELogits1DShiftExpSumKernel, @ref arm_compute::NELogits1DNormKernel / @ref arm_compute::NESoftmaxLayer + - @ref arm_compute::NEIm2ColKernel, @ref arm_compute::NECol2ImKernel, arm_compute::NEConvolutionLayerWeightsReshapeKernel / @ref arm_compute::NEConvolutionLayer + - @ref arm_compute::NEGEMMMatrixAccumulateBiasesKernel / @ref arm_compute::NEFullyConnectedLayer + - @ref arm_compute::NEGEMMLowpMatrixMultiplyKernel / @ref arm_compute::NEGEMMLowp + +v17.03 Sources preview + - New OpenCL kernels / functions: + - @ref arm_compute::CLGradientKernel, @ref arm_compute::CLEdgeNonMaxSuppressionKernel, @ref arm_compute::CLEdgeTraceKernel / @ref arm_compute::CLCannyEdge + - GEMM refactoring + FP16 support: @ref arm_compute::CLGEMMInterleave4x4Kernel, @ref arm_compute::CLGEMMTranspose1xWKernel, @ref arm_compute::CLGEMMMatrixMultiplyKernel, @ref arm_compute::CLGEMMMatrixAdditionKernel / @ref arm_compute::CLGEMM + - @ref arm_compute::CLGEMMMatrixAccumulateBiasesKernel / @ref arm_compute::CLFullyConnectedLayer + - @ref arm_compute::CLTransposeKernel / @ref arm_compute::CLTranspose + - @ref arm_compute::CLLKTrackerInitKernel, @ref arm_compute::CLLKTrackerStage0Kernel, @ref arm_compute::CLLKTrackerStage1Kernel, @ref arm_compute::CLLKTrackerFinalizeKernel / @ref arm_compute::CLOpticalFlow + - @ref arm_compute::CLNormalizationLayerKernel / @ref arm_compute::CLNormalizationLayer + - @ref arm_compute::CLLaplacianPyramid, @ref arm_compute::CLLaplacianReconstruct + - New NEON kernels / functions: + - @ref arm_compute::NEActivationLayerKernel / @ref arm_compute::NEActivationLayer + - GEMM refactoring + FP16 support (Requires armv8.2 CPU): @ref arm_compute::NEGEMMInterleave4x4Kernel, @ref arm_compute::NEGEMMTranspose1xWKernel, @ref arm_compute::NEGEMMMatrixMultiplyKernel, @ref arm_compute::NEGEMMMatrixAdditionKernel / @ref arm_compute::NEGEMM + - @ref arm_compute::NEPoolingLayerKernel / @ref arm_compute::NEPoolingLayer + +v17.02.1 Sources preview + - New OpenCL kernels / functions: + - @ref arm_compute::CLLogits1DMaxKernel, @ref arm_compute::CLLogits1DShiftExpSumKernel, @ref arm_compute::CLLogits1DNormKernel / @ref arm_compute::CLSoftmaxLayer + - @ref arm_compute::CLPoolingLayerKernel / @ref arm_compute::CLPoolingLayer + - @ref arm_compute::CLIm2ColKernel, @ref arm_compute::CLCol2ImKernel, @ref arm_compute::CLConvolutionLayerWeightsReshapeKernel / @ref arm_compute::CLConvolutionLayer + - @ref arm_compute::CLRemapKernel / @ref arm_compute::CLRemap + - @ref arm_compute::CLGaussianPyramidHorKernel, @ref arm_compute::CLGaussianPyramidVertKernel / @ref arm_compute::CLGaussianPyramid, @ref arm_compute::CLGaussianPyramidHalf, @ref arm_compute::CLGaussianPyramidOrb + - @ref arm_compute::CLMinMaxKernel, @ref arm_compute::CLMinMaxLocationKernel / @ref arm_compute::CLMinMaxLocation + - @ref arm_compute::CLNonLinearFilterKernel / @ref arm_compute::CLNonLinearFilter + - New NEON FP16 kernels (Requires armv8.2 CPU) + - @ref arm_compute::NEAccumulateWeightedFP16Kernel + - @ref arm_compute::NEBox3x3FP16Kernel + - @ref arm_compute::NENonMaximaSuppression3x3FP16Kernel + +v17.02 Sources preview + - New OpenCL kernels / functions: + - @ref arm_compute::CLActivationLayerKernel / @ref arm_compute::CLActivationLayer + - @ref arm_compute::CLChannelCombineKernel / @ref arm_compute::CLChannelCombine + - @ref arm_compute::CLDerivativeKernel / @ref arm_compute::CLChannelExtract + - @ref arm_compute::CLFastCornersKernel / @ref arm_compute::CLFastCorners + - @ref arm_compute::CLMeanStdDevKernel / @ref arm_compute::CLMeanStdDev + - New NEON kernels / functions: + - HOG / SVM: @ref arm_compute::NEHOGOrientationBinningKernel, @ref arm_compute::NEHOGBlockNormalizationKernel, @ref arm_compute::NEHOGDetectorKernel, arm_compute::NEHOGNonMaximaSuppressionKernel / @ref arm_compute::NEHOGDescriptor, @ref arm_compute::NEHOGDetector, @ref arm_compute::NEHOGGradient, @ref arm_compute::NEHOGMultiDetection + - @ref arm_compute::NENonLinearFilterKernel / @ref arm_compute::NENonLinearFilter + - Introduced a CLScheduler to manage the default context and command queue used by the runtime library and create synchronisation events. + - Switched all the kernels / functions to use tensors instead of images. + - Updated documentation to include instructions to build the library from sources. + +v16.12 Binary preview release + - Original release + +@section S3_how_to_build How to build the library and the examples + +@subsection S3_1_build_options Build options + +scons 2.3 or above is required to build the library. +To see the build options available simply run ```scons -h```: + + debug: Debug (yes|no) + default: False + actual: False + + asserts: Enable asserts (this flag is forced to 1 for debug=1) (yes|no) + default: False + actual: False + + arch: Target Architecture (armv7a|arm64-v8a|arm64-v8.2-a|x86_32|x86_64) + default: armv7a + actual: armv7a + + os: Target OS (linux|android|bare_metal) + default: linux + actual: linux + + build: Build type (native|cross_compile) + default: cross_compile + actual: cross_compile + + examples: Build example programs (yes|no) + default: True + actual: True + + Werror: Enable/disable the -Werror compilation flag (yes|no) + default: True + actual: True + + opencl: Enable OpenCL support (yes|no) + default: True + actual: True + + neon: Enable Neon support (yes|no) + default: False + actual: False + + embed_kernels: Embed OpenCL kernels in library binary (yes|no) + default: False + actual: False + + set_soname: Set the library's soname and shlibversion (requires SCons 2.4 or above) (yes|no) + default: False + actual: False + + openmp: Enable OpenMP backend (yes|no) + default: False + actual: False + + cppthreads: Enable C++11 threads backend (yes|no) + default: True + actual: True + + build_dir: Specify sub-folder for the build ( /path/to/build_dir ) + default: . + actual: . + + extra_cxx_flags: Extra CXX flags to be appended to the build command + default: + actual: + + pmu: Enable PMU counters (yes|no) + default: False + actual: False + + validation_tests: Build validation test programs (yes|no) + default: False + actual: False + + benchmark_tests: Build benchmark test programs (yes|no) + default: False + actual: False + +@b debug / @b asserts: + - With debug=1 asserts are enabled, and the library is built with symbols and no optimisations enabled. + - With debug=0 and asserts=1: Optimisations are enabled and symbols are removed, however all the asserts are still present (This is about 20% slower than the release build) + - With debug=0 and asserts=0: All optimisations are enable and no validation is performed, if the application misuses the library it is likely to result in a crash. (Only use this mode once you are sure your application is working as expected). + +@b arch: The x86_32 and x86_64 targets can only be used with neon=0 and opencl=1. + +@b os: Choose the operating system you are targeting: Linux, Android or bare metal. +@note bare metal can only be used for NEON (not OpenCL), only static libraries get built and NEON's multi-threading support is disabled. + +@b build: you can either build directly on your device (native) or cross compile from your desktop machine (cross-compile). In both cases make sure the compiler is available in your path. + +@note If you want to natively compile for 32bit on a 64bit ARM device running a 64bit OS then you will have to use cross-compile too. + +@b Werror: If you are compiling using the same toolchains as the ones used in this guide then there shouldn't be any warning and therefore you should be able to keep Werror=1. If with a different compiler version the library fails to build because of warnings interpreted as errors then, if you are sure the warnings are not important, you might want to try to build with Werror=0 (But please do report the issue either on Github or by an email to developer@arm.com so that the issue can be addressed). + +@b opencl / @b neon: Choose which SIMD technology you want to target. (NEON for ARM Cortex-A CPUs or OpenCL for ARM Mali GPUs) + +@b embed_kernels: For OpenCL only: set embed_kernels=1 if you want the OpenCL kernels to be built in the library's binaries instead of being read from separate ".cl" files. If embed_kernels is set to 0 then the application can set the path to the folder containing the OpenCL kernel files by calling CLKernelLibrary::init(). By default the path is set to "./cl_kernels". + +@b set_soname: Do you want to build the versioned version of the library ? + +If enabled the library will contain a SONAME and SHLIBVERSION and some symlinks will automatically be created between the objects. +Example: + libarm_compute_core.so -> libarm_compute_core.so.1.0.0 + libarm_compute_core.so.1 -> libarm_compute_core.so.1.0.0 + libarm_compute_core.so.1.0.0 + +@note This options is disabled by default as it requires SCons version 2.4 or above. + +@b extra_cxx_flags: Custom CXX flags which will be appended to the end of the build command. + +@b build_dir: Build the library in a subfolder of the "build" folder. (Allows to build several configurations in parallel). + +@b examples: Build or not the examples + +@b validation_tests: Enable the build of the validation suite. + +@note You will need the Boost Test and Program options headers and libraries to build the validation tests. See @ref building_boost for more information. + +@b benchmark_tests: Enable the build of the benchmark tests + +@b pmu: Enable the PMU cycle counter to measure execution time in benchmark tests. (Your device needs to support it) + +@note You will need the Boost Program options and Google Benchmark headers and libraries to build the benchmark tests. See @ref building_google_benchmark for more information. + +@b openmp Build in the OpenMP scheduler for NEON. + +@note Only works when building with g++ not clang++ + +@b cppthreads Build in the C++11 scheduler for NEON. + +@sa arm_compute::Scheduler::set + +@subsection S3_2_linux Linux + +@subsubsection S3_2_1_library How to build the library ? + +For Linux, the library was successfully built and tested using the following Linaro GCC toolchain: + + - gcc-linaro-arm-linux-gnueabihf-4.9-2014.07_linux + - gcc-linaro-4.9-2016.02-x86_64_aarch64-linux-gnu + - gcc-linaro-6.3.1-2017.02-i686_aarch64-linux-gnu + +@note If you are building with opencl=1 then scons will expect to find libOpenCL.so either in the current directory or in "build" (See the section below if you need a stub OpenCL library to link against) + +To cross-compile the library in debug mode, with NEON only support, for Linux 32bit: + + scons Werror=1 -j8 debug=1 neon=1 opencl=0 os=linux arch=armv7a + +To cross-compile the library in asserts mode, with OpenCL only support, for Linux 64bit: + + scons Werror=1 -j8 debug=0 asserts=1 neon=0 opencl=1 embed_kernels=1 os=linux arch=arm64-v8a + +You can also compile the library natively on an ARM device by using build=native: + + scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux arch=arm64-v8a build=native + scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux arch=armv7a build=native + +@note g++ for ARM is mono-arch, therefore if you want to compile for Linux 32bit on a Linux 64bit platform you will have to use a cross compiler. + +For example on a 64bit Debian based system you would have to install g++-arm-linux-gnueabihf + + apt-get install g++-arm-linux-gnueabihf + +Then run + + scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux arch=armv7a build=cross_compile + +or simply remove the build parameter as build=cross_compile is the default value: + + scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux arch=armv7a + +@attention To cross compile with opencl=1 you need to make sure to have a version of libOpenCL matching your target architecture. + +@subsubsection S3_2_2_examples How to manually build the examples ? + +The examples get automatically built by scons as part of the build process of the library described above. This section just describes how you can build and link your own application against our library. + +@note The following command lines assume the arm_compute and libOpenCL binaries are present in the current directory or in the system library path. If this is not the case you can specify the location of the pre-built library with the compiler option -L. When building the OpenCL example the commands below assume that the CL headers are located in the include folder where the command is executed. + +To cross compile a NEON example for Linux 32bit: + + arm-linux-gnueabihf-g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -std=c++11 -mfpu=neon -L. -larm_compute -o neon_convolution + +To cross compile a NEON example for Linux 64bit: + + aarch64-linux-gnu-g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -std=c++11 -L. -larm_compute -o neon_convolution + +(notice the only difference with the 32 bit command is that we don't need the -mfpu option and the compiler's name is different) + +To cross compile an OpenCL example for Linux 32bit: + + arm-linux-gnueabihf-g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -mfpu=neon -L. -larm_compute -lOpenCL -o cl_convolution + +To cross compile an OpenCL example for Linux 64bit: + + aarch64-linux-gnu-g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -L. -larm_compute -lOpenCL -o cl_convolution + +(notice the only difference with the 32 bit command is that we don't need the -mfpu option and the compiler's name is different) + +To compile natively (i.e directly on an ARM device) for NEON for Linux 32bit: + + g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -std=c++11 -mfpu=neon -larm_compute -o neon_convolution + +To compile natively (i.e directly on an ARM device) for NEON for Linux 64bit: + + g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -std=c++11 -larm_compute -o neon_convolution + +(notice the only difference with the 32 bit command is that we don't need the -mfpu option) + +To compile natively (i.e directly on an ARM device) for OpenCL for Linux 32bit or Linux 64bit: + + g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute -lOpenCL -o cl_convolution + + +@note These two commands assume libarm_compute.so is available in your library path, if not add the path to it using -L + +To run the built executable simply run: + + LD_LIBRARY_PATH=build ./neon_convolution + +or + + LD_LIBRARY_PATH=build ./cl_convolution + +@note If you built the library with support for both OpenCL and NEON you will need to link against OpenCL even if your application only uses NEON. + +@subsection S3_3_android Android + +For Android, the library was successfully built and tested using Google's standalone toolchains: + - arm-linux-androideabi-4.9 for armv7a (clang++) + - aarch64-linux-android-4.9 for arm64-v8a (g++) + +Here is a guide to create your Android standalone toolchains from the NDK + +- Download the NDK r14 from here: https://developer.android.com/ndk/downloads/index.html +- Make sure you have Python 2 installed on your machine. +- Generate the 32 and/or 64 toolchains by running the following commands: + + + $NDK/build/tools/make_standalone_toolchain.py --arch arm64 --install-dir $MY_TOOLCHAINS/aarch64-linux-android-4.9 --stl gnustl + $NDK/build/tools/make_standalone_toolchain.py --arch arm --install-dir $MY_TOOLCHAINS/arm-linux-androideabi-4.9 --stl gnustl + +@attention Due to some NDK issues make sure you use g++ & gnustl for aarch64 and clang++ & gnustl for armv7 + +@note Make sure to add the toolchains to your PATH: export PATH=$PATH:$MY_TOOLCHAINS/aarch64-linux-android-4.9/bin:$MY_TOOLCHAINS/arm-linux-androideabi-4.9/bin + +@subsubsection S3_3_1_library How to build the library ? + +@note If you are building with opencl=1 then scons will expect to find libOpenCL.so either in the current directory or in "build" (See the section below if you need a stub OpenCL library to link against) + +To cross-compile the library in debug mode, with NEON only support, for Android 32bit: + + CXX=clang++ CC=clang scons Werror=1 -j8 debug=1 neon=1 opencl=0 os=android arch=armv7a + +To cross-compile the library in asserts mode, with OpenCL only support, for Android 64bit: + + scons Werror=1 -j8 debug=0 asserts=1 neon=0 opencl=1 embed_kernels=1 os=android arch=arm64-v8a + +@subsubsection S3_3_2_examples How to manually build the examples ? + +The examples get automatically built by scons as part of the build process of the library described above. This section just describes how you can build and link your own application against our library. + +@note The following command lines assume the arm_compute and libOpenCL binaries are present in the current directory or in the system library path. If this is not the case you can specify the location of the pre-built library with the compiler option -L. When building the OpenCL example the commands below assume that the CL headers are located in the include folder where the command is executed. + +Once you've got your Android standalone toolchain built and added to your path you can do the following: + +To cross compile a NEON example: + + #32 bit: + arm-linux-androideabi-clang++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o neon_convolution_arm -static-libstdc++ -pie + #64 bit: + aarch64-linux-android-g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o neon_convolution_aarch64 -static-libstdc++ -pie + +To cross compile an OpenCL example: + + #32 bit: + arm-linux-androideabi-clang++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o cl_convolution_arm -static-libstdc++ -pie -lOpenCL + #64 bit: + aarch64-linux-android-g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o cl_convolution_aarch64 -static-libstdc++ -pie -lOpenCL + +@note Due to some issues in older versions of the Mali OpenCL DDK (<= r13p0), we recommend to link arm_compute statically on Android. + +Then you need to do is upload the executable and the shared library to the device using ADB: + + adb push neon_convolution_arm /data/local/tmp/ + adb push cl_convolution_arm /data/local/tmp/ + adb shell chmod 777 -R /data/local/tmp/ + +And finally to run the example: + + adb shell /data/local/tmp/neon_convolution_arm + adb shell /data/local/tmp/cl_convolution_arm + +For 64bit: + + adb push neon_convolution_aarch64 /data/local/tmp/ + adb push cl_convolution_aarch64 /data/local/tmp/ + adb shell chmod 777 -R /data/local/tmp/ + +And finally to run the example: + + adb shell /data/local/tmp/neon_convolution_aarch64 + adb shell /data/local/tmp/cl_convolution_aarch64 + +@subsection S3_4_cl_stub_library The OpenCL stub library + +In the opencl-1.2-stubs folder you will find the sources to build a stub OpenCL library which then can be used to link your application or arm_compute against. + +If you preferred you could retrieve the OpenCL library from your device and link against this one but often this library will have dependencies on a range of system libraries forcing you to link your application against those too even though it is not using them. + +@warning This OpenCL library provided is a stub and *not* a real implementation. You can use it to resolve OpenCL's symbols in arm_compute while building the example but you must make sure the real libOpenCL.so is in your PATH when running the example or it will not work. + +To cross-compile the stub OpenCL library simply run: + + -gcc -o libOpenCL.so -Iinclude opencl-1.2-stubs/opencl_stubs.c -fPIC -shared + +For example: + + -gcc -o libOpenCL.so -Iinclude opencl-1.2-stubs/opencl_stubs.c -fPIC -shared + #Linux 32bit + arm-linux-gnueabihf-gcc -o libOpenCL.so -Iinclude opencl-1.2-stubs/opencl_stubs.c -fPIC -shared + #Linux 64bit + aarch64-linux-gnu-gcc -o libOpenCL.so -Iinclude -shared opencl-1.2-stubs/opencl_stubs.c -fPIC + #Android 32bit + arm-linux-androideabi-clang -o libOpenCL.so -Iinclude -shared opencl-1.2-stubs/opencl_stubs.c -fPIC -shared + #Android 64bit + aarch64-linux-android-gcc -o libOpenCL.so -Iinclude -shared opencl-1.2-stubs/opencl_stubs.c -fPIC -shared +*/ diff --git a/docs/01_library.dox b/docs/01_library.dox new file mode 100644 index 0000000..738579e --- /dev/null +++ b/docs/01_library.dox @@ -0,0 +1,250 @@ +namespace arm_compute +{ +/** +@page architecture Library architecture + +@tableofcontents + +@section S4_1 Core vs Runtime libraries + +The Core library is a low level collection of algorithms implementations, it is designed to be embedded in existing projects and applications: + +- It doesn't allocate any memory (All the memory allocations/mappings have to be handled by the caller). +- It doesn't perform any kind of multi-threading (but provide information to the caller about how the workload can be split). + +The Runtime library is a very basic wrapper around the Core library which can be used for quick prototyping, it is basic in the sense that: + +- It allocates images and tensors by using standard malloc(). +- It multi-threads NEON code in a very basic way using a very simple pool of threads. +- For OpenCL it uses the default CLScheduler command queue for all mapping operations and kernels. + +For maximum performance, it is expected that the users would re-implement an equivalent to the runtime library which suits better their needs (With a more clever multi-threading strategy, load-balancing between NEON and OpenCL, etc.) + +@section S4_2_windows_kernels_mt_functions Windows, kernels, multi-threading and functions + +@subsection S4_2_1_windows Windows + +A @ref Window represents a workload to execute, it can handle up to @ref Coordinates::num_max_dimensions dimensions. +Each dimension is defined by a start, end and step. + +It can split into subwindows as long as *all* the following rules remain true for all the dimensions: + +- max[n].start() <= sub[n].start() < max[n].end() +- sub[n].start() < sub[n].end() <= max[n].end() +- max[n].step() == sub[n].step() +- (sub[n].start() - max[n].start()) % max[n].step() == 0 +- (sub[n].end() - sub[n].start()) % max[n].step() == 0 + +@subsection S4_2_2 Kernels + +Each implementation of the @ref IKernel interface (base class of all the kernels in the core library) works in the same way: + +OpenCL kernels: + +@code{.cpp} +// Initialize the CLScheduler with the default context and default command queue +// Implicitly initializes the CLKernelLibrary to use ./cl_kernels as location for OpenCL kernels files and sets a default device for which OpenCL programs are built. +CLScheduler::get().default_init(); + +cl::CommandQueue q = CLScheduler::get().queue(); +//Create a kernel object: +MyKernel kernel; +// Initialize the kernel with the input/output and options you want to use: +kernel.configure( input, output, option0, option1); +// Retrieve the execution window of the kernel: +const Window& max_window = kernel.window(); +// Run the whole kernel in the current thread: +kernel.run( q, max_window ); // Enqueue the kernel to process the full window on the default queue + +// Wait for the processing to complete: +q.finish(); +@endcode + +NEON / CPP kernels: + +@code{.cpp} +//Create a kernel object: +MyKernel kernel; +// Initialize the kernel with the input/output and options you want to use: +kernel.configure( input, output, option0, option1); +// Retrieve the execution window of the kernel: +const Window& max_window = kernel.window(); +// Run the whole kernel in the current thread: +kernel.run( max_window ); // Run the kernel on the full window +@endcode + +@subsection S4_2_3 Multi-threading + +The previous section shows how to run a NEON / CPP kernel in the current thread, however if your system has several CPU cores, you will probably want the kernel to use several cores. Here is how this can be done: + +@snippet src/runtime/CPP/CPPScheduler.cpp Scheduler example + +This is the very basic implementation used in the NEON runtime library by all the NEON functions. + +@sa CPPScheduler. + +@note Some kernels like for example @ref NEHistogramKernel need some local temporary buffer to perform their calculations. In order to avoid memory corruption between threads, the local buffer must be of size: ```memory_needed_per_thread * num_threads``` and each subwindow must be initialized by calling @ref Window::set_thread_id() with a unique thread_id between 0 and num_threads. + +@subsection S4_2_4 Functions + +Functions will automatically allocate the temporary buffers mentioned above, and will automatically multi-thread kernels' executions using the very basic scheduler described in the previous section. + +Simple functions only call a single kernel (e.g @ref NEConvolution3x3), while more complex ones consist of several kernels pipelined together (e.g @ref NEGaussianPyramid, @ref NEHarrisCorners). Check their documentation to find out which kernels are used by each function. + +@code{.cpp} +//Create a function object: +MyFunction function; +// Initialize the function with the input/output and options you want to use: +function.configure( input, output, option0, option1); +// Execute the function: +function.run(); +@endcode + +@warning The Compute Library requires Mali OpenCL DDK r8p0 or higher (OpenCL kernels are compiled using the -cl-arm-non-uniform-work-group-size flag) + +@note All OpenCL functions and objects in the runtime library use the command queue associated with CLScheduler for all operations, a real implementation would be expected to use different queues for mapping operations and kernels in order to reach a better GPU utilization. + +@subsection S4_4_1_cl_scheduler OpenCL Scheduler and kernel library + +The Compute Library runtime uses a single command queue and context for all the operations. + +The user can get / set this context and command queue through CLScheduler's interface. + +The user can get / set the target GPU device through the CLScheduler's interface. + +@attention Make sure the application is using the same context as the library as in OpenCL it is forbidden to share objects across contexts. This is done by calling @ref CLScheduler::init() or @ref CLScheduler::default_init() at the beginning of your application. + +@attention Make sure the scheduler's target is not changed after function classes are created. + +All OpenCL kernels used by the library are built and stored in @ref CLKernelLibrary. +If the library is compiled with embed_kernels=0 the application can set the path to the OpenCL kernels by calling @ref CLKernelLibrary::init(), by default the path is set to "./cl_kernels" + +@subsection S4_4_2_events_sync OpenCL events and synchronization + +In order to block until all the jobs in the CLScheduler's command queue are done executing the user can call @ref CLScheduler::sync() or create a sync event using @ref CLScheduler::enqueue_sync_event() + +For example: +@snippet cl_events.cpp OpenCL events + +@subsection S4_4_2_cl_neon OpenCL / NEON interoperability + +You can mix OpenCL and NEON kernels and functions. However it is the user's responsibility to handle the mapping/unmapping of OpenCL objects, for example: + +@snippet neoncl_scale_median_gaussian.cpp NEON / OpenCL Interop + +@sa main_neoncl_scale_median_gaussian + +@section S4_5_algorithms Algorithms + +All algorithms in this library have been implemented following the [OpenVX 1.1 specifications](https://www.khronos.org/registry/vx/specs/1.1/html/). Please refer to the Khronos documentation for more information. + +@section S4_6_images_tensors Images, padding, border modes and tensors + +Most kernels and functions in the library process images, however, in order to be future proof most of the kernels actually accept tensors. See below for more information about how they are related. + +@attention Each memory object can be written by only one kernel, however it can be read by several kernels. Writing to the same object from several kernels will result in undefined behavior. The kernel writing to an object must be configured before the kernel(s) reading from it. + +@subsection S4_6_1_padding_and_border Padding and border modes + +Several algorithms require a neighborhood around the current pixel to compute it's value. This means the algorithm will not be able to process the borders of the image unless you give it more information about how those border pixels should be processed. The @ref BorderMode enum is used for this purpose. + +You have 3 types of @ref BorderMode : + +- @ref BorderMode::UNDEFINED : Neighbor pixels outside of the image are treated as undefined. As a result all the pixels which are on the border will have a value which is undefined. +- @ref BorderMode::REPLICATE : Neighbor pixels outside of the image are treated as having the same value as the closest valid pixel. +- @ref BorderMode::CONSTANT : Neighbor pixels outside of the image are treated as having the same constant value. (The user can choose what this value should be). + +Moreover both OpenCL and NEON use vector loads and stores instructions to access the data in buffers, so in order to avoid having special cases to handle for the borders all the images and tensors used in this library must be padded. + +@subsubsection padding Padding + +There are different ways padding can be calculated: + +- Accurate padding: + +@snippet neon_convolution.cpp Accurate padding + +@note It's important to call allocate @b after the function is configured: if the image / tensor is already allocated then the function will shrink its execution window instead of increasing the padding. (See below for more details). + +- Manual padding / no padding / auto padding: You can allocate your images / tensors up front (before configuring your functions). In that case the function will use whatever padding is available and will shrink its execution window if there isn't enough padding available (which translates into a smaller valid region for the output). See also @ref valid_region). +If you don't want to manually set the padding but still want to allocate your objects upfront then you can use auto_padding. It guarantees that the allocation will have enough padding to run any of the provided functions. + +@code{.cpp} +Image src, dst; + +// Use auto padding for the input: +src.info()->init_auto_padding(TensorShape(640u,480u), Format::U8); + +// Use manual padding for the destination image +dst.info()->init(src.info()->tensor_shape(), Format::U8, strides_in_bytes, offset_first_element_in_bytes, total_size_in_bytes); + +// Allocate all the images +src.allocator()->allocate(); +dst.allocator()->allocate(); +// Fill the input image with the content of the PPM image if a filename was provided: +fill_image(src); + +NEGaussian3x3 gauss; + +// Apply a Gaussian 3x3 filter to the source image (Note: if the padding provided is not enough then the execution window and valid region of the output will be shrunk) +gauss.configure(&src, &dst, BorderMode::UNDEFINED); + +//Execute the functions: +gauss.run(); +@endcode + +@warning Some kernels need up to 3 neighbor values to calculate the value of a given pixel. Therefore, to be safe, we use a 4-pixel padding all around the image. In addition, some kernels read and write up to 32 pixels at the same time. To cover that case as well we add an extra 32 pixels of padding at the end of each row. As a result auto padded buffers waste a lot of memory and are less cache friendly. It is therefore recommended to use accurate padding or manual padding wherever possible. + +@subsubsection valid_region Valid regions + +Some kernels (like edge detectors for example) need to read values of neighboring pixels to calculate the value of a given pixel, it is therefore not possible to calculate the values of the pixels on the edges. + +Another case is: if a kernel processes 8 pixels per iteration and the image's dimensions are not a multiple of 8 and not enough padding is available then the kernel will not be able to process the pixels near the right edge. As a result these pixels will be left undefined. + +In order to know which pixels have been calculated, each kernel sets a valid region for each output image or tensor. See also @ref TensorInfo::valid_region(), @ref ValidRegion + +@subsection S4_6_2_tensors Tensors + +Tensors are multi-dimensional arrays with a maximum of @ref Coordinates::num_max_dimensions dimensions. + +Depending on the number of dimensions tensors can be interpreted as various objects. A scalar can be represented as a zero-dimensional tensor and a vector of numbers can be represented as an one-dimensional tensor. Further, an image is actually just a 2D tensor, a 3D tensor can be seen as an array of images and a 4D tensor as a 2D array of images, etc. + +@note Most algorithms process images (i.e a 2D slice of the tensor), therefore only padding along the X and Y axes is required (2D slices can be stored contiguously in memory). + +@subsection S4_6_3_description_conventions Images and Tensors description conventions + +Image objects are defined by a @ref Format and dimensions expressed as [width, height, batch] + +Tensors are defined by a @ref DataType plus a number of channels (Always expected to be 1 for now) and their dimensions are expressed as [width, height, feature_maps, batch]. + +In other words, the lower three dimensions of a tensor specify a single input in [width, height, feature_maps], while any other specified dimension represents a batch in the appropriate dimension space. +For example, a tensor with dimensions [128, 128, 64, 16] represents a 1D batch space with 16 batches of 128 elements in width and height and 64 feature maps each. +Each kernel specifies the expected layout of each of its tensors in its documentation. + +@note Unless specified otherwise in the kernel's or function's documentation all tensors and images parameters passed must have identical dimensions. + +@note Unless specified otherwise in the kernel's or function's documentation the number of channels for tensors is expected to be 1 (For images, the number of channels is inferred from the @ref Format). + +@attention Regardless of the @ref DataType used by a tensor the @ref ITensor::buffer() method will always return a uint8_t pointer, and all the metadata in @ref TensorInfo will be expressed in bytes. It is the user's responsibility to cast the pointer to the correct type. + +For example, to read the element located at the coordinates (x,y) of a float tensor: + +@code{.cpp} +float value = *reinterpret_cast(input.buffer() + input.info()->offset_element_in_bytes(Coordinates(x,y))); +@endcode + +@subsection S4_6_4_working_with_objects Working with Images and Tensors using iterators + +The library provides some iterators to access objects' data. +Iterators are created by associating a data object (An image or a tensor for example) with an iteration window. + +Iteration windows are defined by an array of dimensions, each of which consists of a start, end and step. + +The @ref execute_window_loop function takes an execution window, a lambda function and one or more iterators. +It will iterate through every element of the execution window and for each element it will update the iterators accordingly and call the lambda function. + +Here are a couple of examples of how to use the iterators to fill / read tensors: + +@snippet examples/neon_copy_objects.cpp Copy objects example +*/ +} // namespace arm_compute diff --git a/docs/02_tests.dox b/docs/02_tests.dox new file mode 100644 index 0000000..bf8838c --- /dev/null +++ b/docs/02_tests.dox @@ -0,0 +1,95 @@ +/** +@page tests Validation and benchmarks tests + +@tableofcontents + +@section building_test_dependencies Building dependencies + +The tests currently make use of Boost (Test and Program options) for validation +and Google Benchmark for performance runs. Below are instructions about how to +build these 3rd party libraries. + +@note By default the build of the validation and benchmark tests is disabled, to enable it use `validation_tests=1` and `benchmark_tests=1` + +@subsection building_boost Building Boost + +First follow the instructions from the Boost library on how to setup the Boost +build system +(http://www.boost.org/doc/libs/1_64_0/more/getting_started/index.html). +Afterwards the required libraries can be build with: + + ./b2 --with-program_options --with-test link=static \ + define=BOOST_TEST_ALTERNATIVE_INIT_API + +Additionally, depending on your environment, it might be necessary to specify +the ```toolset=``` option to choose the right compiler. Moreover, +```address-model=32``` can be used to force building for 32bit and +```target-os=android``` must be specified to build for Android. + +After executing the build command the libraries +```libboost_program_options.a``` and ```libboost_unit_test_framework.a``` can +be found in ```./stage/lib```. + +@subsection building_google_benchmark Building Google Benchmark + +Instructions on how to build Google Benchmark using CMake can be found in their +repository: https://github.com/google/benchmark. For example, building for +Android 32bit can be achieved via + + cmake -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER=arm-linux-androideabi-clang++ \ + -DBENCHMARK_ENABLE_LTO=false -DBENCHMARK_ENABLE_TESTING=false .. + +The library required by the compute library is ```libbenchmark.a```. + +@section tests_running_tests Running tests +@subsection tests_running_tests_benchmarking Benchmarking +@subsubsection tests_running_tests_benchmarking_filter Filter tests +All tests can be run by invoking + + ./arm_compute_benchmark -- ./data + +where `./data` contains the assets needed by the tests. + +If only a subset of the tests has to be executed the `--benchmark_filter` option takes a regular expression to select matching tests. + + ./arm_compute_benchmark --benchmark_filter=neon_bitwise_and ./data + +All available tests can be displayed with the `--benchmark_list_tests` switch. + + ./arm_compute_benchmark --benchmark_list_tests ./data + +@subsubsection tests_running_tests_benchmarking_runtime Runtime +By default every test is run multiple *iterations* until a minimum time is reached. The minimum time (in seconds) can be controlled with the `--benchmark_min_time` flag. However, each test might have a hard coded value for the number of iterations or minimum execution time. In that case the command line argument is ignored for those specific tests. +Additionally it is possible to specify multiple *repetitions* (`--benchmark_repetitions`) which will run each test multiple times (including the iterations). The average and standard deviation for all repetitions is automatically computed and reported. + +@subsubsection tests_running_tests_benchmarking_verbosity Verbosity +The verbosity of the test output can be controlled via the `--v` flag. Though it should hardly ever be necessary. + +@subsection tests_running_tests_validation Validation +@subsubsection tests_running_tests_validation_filter Filter tests +All tests can be run by invoking + + ./arm_compute_validation -- ./data + +where `./data` contains the assets needed by the tests. + +As running all tests can take a lot of time the suite is split into "precommit" and "nightly" tests. The precommit tests will be fast to execute but still cover the most important features. In contrast the nightly tests offer more extensive coverage but take longer. The different subsets can be selected from the command line as follows: + + ./arm_compute_validation -t @precommit -- ./data + ./arm_compute_validation -t @nightly -- ./data + +Additionally it is possible to select specific suites or tests: + + ./arm_compute_validation -t CL -- ./data + ./arm_compute_validation -t NEON/BitwiseAnd/RunSmall/_0 -- ./data + +All available tests can be displayed with the `--list_content` switch. + + ./arm_compute_validation --list_content -- ./data + +For a complete list of possible selectors please see: http://www.boost.org/doc/libs/1_64_0/libs/test/doc/html/boost_test/runtime_config/test_unit_filtering.html + +@subsubsection tests_running_tests_validation_verbosity Verbosity +There are two separate flags to control the verbosity of the test output. `--report_level` controls the verbosity of the summary produced after all tests have been executed. `--log_level` controls the verbosity of the information generated during the execution of tests. All available settings can be found in the Boost documentation for [--report_level](http://www.boost.org/doc/libs/1_64_0/libs/test/doc/html/boost_test/utf_reference/rt_param_reference/report_level.html) and [--log_level](http://www.boost.org/doc/libs/1_64_0/libs/test/doc/html/boost_test/utf_reference/rt_param_reference/log_level.html), respectively. +*/ diff --git a/docs/Doxyfile b/docs/Doxyfile index a305e5d..0876f3a 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -32,13 +32,13 @@ DOXYFILE_ENCODING = UTF-8 # title of most generated pages and in a few other places. # The default value is: My Project. -PROJECT_NAME = "ARM Compute Library" +PROJECT_NAME = "Compute Library" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 17.05 +PROJECT_NUMBER = v17.06 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a @@ -58,7 +58,7 @@ PROJECT_LOGO = # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. -OUTPUT_DIRECTORY = build/arm_compute/ +OUTPUT_DIRECTORY = build/docs/ # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and @@ -162,7 +162,7 @@ FULL_PATH_NAMES = YES # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. -STRIP_FROM_PATH = +#STRIP_FROM_PATH = arm_compute/ # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which @@ -768,11 +768,14 @@ WARN_LOGFILE = # spaces. # Note: If this tag is empty the current directory is searched. -INPUT = ./docs/arm_compute.dox \ +INPUT = ./docs/00_introduction.dox \ + ./docs/01_library.dox \ + ./docs/02_tests.dox \ ./arm_compute/ \ ./src/core/CL/cl_kernels/ \ ./examples/ \ - ./test_helpers/ + ./tests/ \ + ./utils/ # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -882,9 +885,9 @@ EXCLUDE_SYMBOLS = # that contain example code fragments that are included (see the \include # command). -EXAMPLE_PATH = examples/ \ +EXAMPLE_PATH = ./examples/ \ . \ - + ./arm_compute/ # "." is Needed by the release script diff --git a/docs/arm_compute.dox b/docs/arm_compute.dox deleted file mode 100644 index 354bc3f..0000000 --- a/docs/arm_compute.dox +++ /dev/null @@ -1,718 +0,0 @@ -namespace arm_compute -{ -/** @mainpage Documentation - -@tableofcontents - -@section S0_introduction Introduction - -The ARM Computer Vision and Machine Learning library is a set of functions optimised for both ARM CPUs and GPUs using SIMD technologies. - -Several builds of the library are available using various configurations: - - OS: Linux, Android or bare metal. - - Architecture: armv7a (32bit) or arm64-v8a (64bit) - - Technology: NEON / OpenCL / NEON and OpenCL - - Debug / Asserts / Release: Use a build with asserts enabled to debug your application and enable extra validation. Once you are sure your application works as expected you can switch to a release build of the library for maximum performance. - -@subsection S0_1_contact Contact / Support - -Please email developer@arm.com - -In order to facilitate the work of the support team please provide the build information of the library you are using. To get the version of the library you are using simply run: - - $ strings android-armv7a-cl-asserts/libarm_compute.so | grep arm_compute_version - arm_compute_version=v16.12 Build options: {'embed_kernels': '1', 'opencl': '1', 'arch': 'armv7a', 'neon': '0', 'asserts': '1', 'debug': '0', 'os': 'android', 'Werror': '1'} Git hash=f51a545d4ea12a9059fe4e598a092f1fd06dc858 - -@section S1_file_organisation File organisation - -This archive contains: - - The arm_compute header and source files - - The latest Khronos OpenCL 1.2 C headers from the Khronos OpenCL registry - - The latest Khronos cl2.hpp from the Khronos OpenCL registry (API version 2.1 when this document was written) - - The sources for a stub version of libOpenCL.so to help you build your application. - - An examples folder containing a few examples to compile and link against the library. - - A @ref test_helpers folder containing headers with some boiler plate code used by the examples. - - This documentation. - -You should have the following file organisation: - - . - ├── arm_compute --> All the arm_compute headers - │   ├── core - │   │   ├── CL - │   │   │   ├── CLKernels.h --> Includes all the OpenCL kernels at once - │   │   │   ├── CL specialisation of all the generic objects interfaces (ICLTensor, ICLImage, etc.) - │   │   │   ├── kernels --> Folder containing all the OpenCL kernels - │   │   │   │   └── CL*Kernel.h - │   │   │   └── OpenCL.h --> Wrapper to configure the Khronos OpenCL C++ header - │   │ ├── CPP - │   │ │   └── kernels --> Folder containing all the CPP kernels - │   │   │   │   └── CPP*Kernel.h - │   │   ├── NEON - │   │   │   ├── kernels --> Folder containing all the NEON kernels - │   │   │   │   └── NE*Kernel.h - │   │   │   └── NEKernels.h --> Includes all the NEON kernels at once - │   │   ├── All common basic types (Types.h, Window, Coordinates, Iterator, etc.) - │   │   ├── All generic objects interfaces (ITensor, IImage, etc.) - │   │   └── Objects metadata classes (ImageInfo, TensorInfo, MultiImageInfo) - │   └── runtime - │   ├── CL - │   │   ├── CL objects & allocators (CLArray, CLImage, CLTensor, etc.) - │   │   ├── functions --> Folder containing all the OpenCL functions - │   │   │   └── CL*.h - │   │   └── CLFunctions.h --> Includes all the OpenCL functions at once - │   ├── CPP - │   │   └── CPPScheduler.h --> Basic pool of threads to execute CPP/NEON code on several cores in parallel - │   ├── NEON - │   │ ├── functions --> Folder containing all the NEON functions - │   │ │   └── NE*.h - │   │ └── NEFunctions.h --> Includes all the NEON functions at once - │   └── Basic implementations of the generic object interfaces (Array, Image, Tensor, etc.) - ├── documentation - │   ├── index.xhtml - │   └── ... - ├── documentation.xhtml -> documentation/index.xhtml - ├── examples - │   ├── cl_convolution.cpp - │   ├── neoncl_scale_median_gaussian.cpp - │   ├── neon_convolution.cpp - │   └── neon_scale.cpp - ├── include - │   └── CL - │   └── Khronos OpenCL C headers and C++ wrapper - ├── opencl-1.2-stubs - │ └── opencl_stubs.c - ├── src - │   ├── core - │ │ └── ... (Same structure as headers) - │   │ └── CL - │   │ └── cl_kernels --> All the OpenCL kernels - │ └── runtime - │ └── ... (Same structure as headers) - └── test_helpers --> Boiler plate code used by examples - └── Utils.h - -@section S2_versions_changelog Release versions and changelog - -@subsection S2_1_versions Release versions - -All releases are numbered vYY.MM Where YY are the last two digits of the year, and MM the month number. -If there is more than one release in a month then an extra sequential number is appended at the end: - - v17.03 (First release of March 2017) - v17.03.1 (Second release of March 2017) - v17.04 (First release of April 2017) - -@note We're aiming at releasing one major public release with new features per quarter. All releases in between will only contain bug fixes. - -@subsection S2_2_changelog Changelog - -v17.05 Public bug fixes release - - Various bug fixes - - Remaining of the functions ported to use accurate padding. - - Library does not link against OpenCL anymore (It uses dlopen / dlsym at runtime instead to determine whether or not OpenCL is available). - - Added "free" method to allocator. - - Minimum version of G++ required for armv7 Linux changed from 4.8 to 4.9 - -v17.04 Public bug fixes release - The following functions have been ported to use the new accurate padding: - - @ref CLColorConvertKernel - - @ref CLEdgeNonMaxSuppressionKernel - - @ref CLEdgeTraceKernel - - @ref CLGaussianPyramidHorKernel - - @ref CLGaussianPyramidVertKernel - - @ref CLGradientKernel - - @ref NEChannelCombineKernel - - @ref NEFillArrayKernel - - @ref NEGaussianPyramidHorKernel - - @ref NEGaussianPyramidVertKernel - - @ref NEHarrisScoreFP16Kernel - - @ref NEHarrisScoreKernel - - @ref NEHOGDetectorKernel - - @ref NELogits1DMaxKernel - - @ref NELogits1DShiftExpSumKernel - - @ref NELogits1DNormKernel - - @ref NENonMaximaSuppression3x3FP16Kernel - - @ref NENonMaximaSuppression3x3Kernel - - -v17.03.1 First Major public release of the sources - - Renamed the library to arm_compute - - New CPP target introduced for C++ kernels shared between NEON and CL functions. - - New padding calculation interface introduced and ported most kernels / functions to use it. - - New OpenCL kernels / functions: - - @ref CLGEMMLowpMatrixMultiplyKernel / @ref CLGEMMLowp - - New NEON kernels / functions: - - @ref NENormalizationLayerKernel / @ref NENormalizationLayer - - @ref NETransposeKernel / @ref NETranspose - - @ref NELogits1DMaxKernel, @ref NELogits1DShiftExpSumKernel, @ref NELogits1DNormKernel / @ref NESoftmaxLayer - - @ref NEIm2ColKernel @ref NECol2ImKernel @ref NEConvolutionLayerWeightsReshapeKernel / @ref NEConvolutionLayer - - @ref NEGEMMMatrixAccumulateBiasesKernel / @ref NEFullyConnectedLayer - - @ref NEGEMMLowpMatrixMultiplyKernel / @ref NEGEMMLowp - -v17.03 Sources preview - - New OpenCL kernels / functions: - - @ref CLGradientKernel, @ref CLEdgeNonMaxSuppressionKernel, @ref CLEdgeTraceKernel / @ref CLCannyEdge - - GEMM refactoring + FP16 support: @ref CLGEMMInterleave4x4Kernel, @ref CLGEMMTranspose1xWKernel, @ref CLGEMMMatrixMultiplyKernel, @ref CLGEMMMatrixAdditionKernel / @ref CLGEMM - - @ref CLGEMMMatrixAccumulateBiasesKernel / @ref CLFullyConnectedLayer - - @ref CLTransposeKernel / @ref CLTranspose - - @ref CLLKTrackerInitKernel, @ref CLLKTrackerStage0Kernel, @ref CLLKTrackerStage1Kernel, @ref CLLKTrackerFinalizeKernel / @ref CLOpticalFlow - - @ref CLNormalizationLayerKernel / @ref CLNormalizationLayer - - @ref CLLaplacianPyramid, @ref CLLaplacianReconstruct - - New NEON kernels / functions: - - @ref NEActivationLayerKernel / @ref NEActivationLayer - - GEMM refactoring + FP16 support (Requires armv8.2 CPU): @ref NEGEMMInterleave4x4Kernel, @ref NEGEMMTranspose1xWKernel, @ref NEGEMMMatrixMultiplyKernel, @ref NEGEMMMatrixAdditionKernel / @ref NEGEMM - - @ref NEPoolingLayerKernel / @ref NEPoolingLayer - -v17.02.1 Sources preview - - New OpenCL kernels / functions: - - @ref CLLogits1DMaxKernel, @ref CLLogits1DShiftExpSumKernel, @ref CLLogits1DNormKernel / @ref CLSoftmaxLayer - - @ref CLPoolingLayerKernel / @ref CLPoolingLayer - - @ref CLIm2ColKernel @ref CLCol2ImKernel @ref CLConvolutionLayerWeightsReshapeKernel / @ref CLConvolutionLayer - - @ref CLRemapKernel / @ref CLRemap - - @ref CLGaussianPyramidHorKernel, @ref CLGaussianPyramidVertKernel / @ref CLGaussianPyramid, @ref CLGaussianPyramidHalf, @ref CLGaussianPyramidOrb - - @ref CLMinMaxKernel, @ref CLMinMaxLocationKernel / @ref CLMinMaxLocation - - @ref CLNonLinearFilterKernel / @ref CLNonLinearFilter - - New NEON FP16 kernels (Requires armv8.2 CPU) - - @ref NEAccumulateWeightedFP16Kernel - - @ref NEBox3x3FP16Kernel - - @ref NENonMaximaSuppression3x3FP16Kernel - -v17.02 Sources preview - - New OpenCL kernels / functions: - - @ref CLActivationLayerKernel / @ref CLActivationLayer - - @ref CLChannelCombineKernel / @ref CLChannelCombine - - @ref CLDerivativeKernel / @ref CLChannelExtract - - @ref CLFastCornersKernel / @ref CLFastCorners - - @ref CLMeanStdDevKernel / @ref CLMeanStdDev - - New NEON kernels / functions: - - HOG / SVM: @ref NEHOGOrientationBinningKernel, @ref NEHOGBlockNormalizationKernel, @ref NEHOGDetectorKernel, @ref NEHOGNonMaximaSuppressionKernel / @ref NEHOGDescriptor, @ref NEHOGDetector, @ref NEHOGGradient, @ref NEHOGMultiDetection - - @ref NENonLinearFilterKernel / @ref NENonLinearFilter - - Introduced a CLScheduler to manage the default context and command queue used by the runtime library and create synchronisation events. - - Switched all the kernels / functions to use tensors instead of images. - - Updated documentation to include instructions to build the library from sources. - -v16.12 Binary preview release - - Original release - -@section S3_how_to_build How to build the library and the examples - -@subsection S3_1_build_options Build options - -scons 2.3 or above is required to build the library. -To see the build options available simply run ```scons -h```: - - debug: Debug (default=0) (0|1) - default: 0 - actual: 0 - - asserts: Enable asserts (This flag is forced to 1 for debug=1) (default=0) (0|1) - default: 0 - actual: 0 - - arch: Target Architecture (default=armv7a) (armv7a|arm64-v8a|arm64-v8.2-a|x86_32|x86_64) - default: armv7a - actual: armv7a - - os: Target OS (default=linux) (linux|android|bare_metal) - default: linux - actual: linux - - build: Build type: (default=cross_compile) (native|cross_compile) - default: cross_compile - actual: cross_compile - - Werror: Enable/disable the -Werror compilation flag (Default=1) (0|1) - default: 1 - actual: 1 - - opencl: Enable OpenCL support(Default=1) (0|1) - default: 1 - actual: 1 - - neon: Enable Neon support(Default=0) (0|1) - default: 0 - actual: 0 - - embed_kernels: Embed OpenCL kernels in library binary(Default=0) (0|1) - default: 0 - actual: 0 - - set_soname: Set the library's soname and shlibversion (Requires SCons 2.4 or above) (yes|no) - default: 0 - actual: False - - extra_cxx_flags: Extra CXX flags to be appended to the build command - default: - actual: - -Debug / asserts: - - With debug=1 asserts are enabled, and the library is built with symbols and no optimisations enabled. - - With debug=0 and asserts=1: Optimisations are enabled and symbols are removed, however all the asserts are still present (This is about 20% slower than the release build) - - With debug=0 and asserts=0: All optimisations are enable and no validation is performed, if the application misuses the library it is likely to result in a crash. (Only use this mode once you are sure your application is working as expected). - -Architecture: The x86_32 and x86_64 targets can only be used with neon=0 and opencl=1. - -OS: Choose the operating system you are targeting: Linux, Android or bare metal. -@note bare metal can only be used for NEON (not OpenCL), only static libraries get built and NEON's multi-threading support is disabled. - -Build type: you can either build directly on your device (native) or cross compile from your desktop machine (cross-compile). In both cases make sure the compiler is available in your path. - -Werror: If you are compiling using the same toolchains as the ones used in this guide then there shouldn't be any warning and therefore you should be able to keep Werror=1. If with a different compiler version the library fails to build because of warnings interpreted as errors then, if you are sure the warnings are not important, you might want to try to build with Werror=0 (But please do report the issue either on Github or by an email to developer@arm.com so that the issue can be addressed). - -OpenCL / NEON: Choose which SIMD technology you want to target. (NEON for ARM Cortex-A CPUs or OpenCL for ARM Mali GPUs) - -embed_kernels: For OpenCL only: set embed_kernels=1 if you want the OpenCL kernels to be built in the library's binaries instead of being read from separate ".cl" files. If embed_kernels is set to 0 then the application can set the path to the folder containing the OpenCL kernel files by calling CLKernelLibrary::init(). By default the path is set to "./cl_kernels". - -set_soname: Do you want to build the versioned version of the library ? -If enabled the library will contain a SONAME and SHLIBVERSION and some symlinks will automatically be created between the objects. -Example: - libarm_compute_core.so -> libarm_compute_core.so.1.0.0 - libarm_compute_core.so.1 -> libarm_compute_core.so.1.0.0 - libarm_compute_core.so.1.0.0 - -@note This options is disabled by default as it requires SCons version 2.4 or above. - -extra_cxx_flags: Custom CXX flags which will be appended to the end of the build command. - -@subsection S3_2_linux Linux - -@subsubsection S3_2_1_library How to build the library ? - -For Linux, the library was successfully built and tested using the following Linaro GCC toolchain: - - - gcc-linaro-arm-linux-gnueabihf-4.9-2014.07_linux - - gcc-linaro-4.9-2016.02-x86_64_aarch64-linux-gnu - - gcc-linaro-6.3.1-2017.02-i686_aarch64-linux-gnu - -@note If you are building with opencl=1 then scons will expect to find libOpenCL.so either in the current directory or in "build" (See the section below if you need a stub OpenCL library to link against) - -To cross-compile the library in debug mode, with NEON only support, for Linux 32bit: - - scons Werror=1 -j8 debug=1 neon=1 opencl=0 os=linux arch=armv7a - -To cross-compile the library in asserts mode, with OpenCL only support, for Linux 64bit: - - scons Werror=1 -j8 debug=0 asserts=1 neon=0 opencl=1 embed_kernels=1 os=linux arch=arm64-v8a - -You can also compile the library natively on an ARM device by using build=native: - - scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux arch=arm64-v8a build=native - scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux arch=armv7a build=native - -@note G++ for ARM is mono-arch, therefore if you want to compile for Linux 32bit on a Linux 64bit platform you will have to use a cross compiler. - -For example on a 64bit Debian based system you would have to install g++-arm-linux-gnueabihf - - apt-get install g++-arm-linux-gnueabihf - -Then run - - scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux arch=armv7a build=cross_compile - -or simply remove the build parameter as build=cross_compile is the default value: - - scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux arch=armv7a - -@attention To cross compile with opencl=1 you need to make sure to have a version of libOpenCL matching your target architecture. - -@subsubsection S3_2_2_examples How to manually build the examples ? - -The examples get automatically built by scons as part of the build process of the library described above. This section just describes how you can build and link your own application against our library. - -@note The following command lines assume the arm_compute and libOpenCL binaries are present in the current directory or in the system library path. If this is not the case you can specify the location of the pre-built library with the compiler option -L. When building the OpenCL example the commands below assume that the CL headers are located in the include folder where the command is executed. - -To cross compile a NEON example for Linux 32bit: - - arm-linux-gnueabihf-g++ examples/neon_convolution.cpp test_helpers/Utils.cpp -I. -std=c++11 -mfpu=neon -L. -larm_compute -o neon_convolution - -To cross compile a NEON example for Linux 64bit: - - aarch64-linux-gnu-g++ examples/neon_convolution.cpp test_helpers/Utils.cpp -I. -std=c++11 -L. -larm_compute -o neon_convolution - -(notice the only difference with the 32 bit command is that we don't need the -mfpu option and the compiler's name is different) - -To cross compile an OpenCL example for Linux 32bit: - - arm-linux-gnueabihf-g++ examples/cl_convolution.cpp test_helpers/Utils.cpp -I. -Iinclude -std=c++11 -mfpu=neon -L. -larm_compute -lOpenCL -o cl_convolution - -To cross compile an OpenCL example for Linux 64bit: - - aarch64-linux-gnu-g++ examples/cl_convolution.cpp test_helpers/Utils.cpp -I. -Iinclude -std=c++11 -L. -larm_compute -lOpenCL -o cl_convolution - -(notice the only difference with the 32 bit command is that we don't need the -mfpu option and the compiler's name is different) - -To compile natively (i.e directly on an ARM device) for NEON for Linux 32bit: - - g++ examples/neon_convolution.cpp test_helpers/Utils.cpp -I. -std=c++11 -mfpu=neon -larm_compute -o neon_convolution - -To compile natively (i.e directly on an ARM device) for NEON for Linux 64bit: - - g++ examples/neon_convolution.cpp test_helpers/Utils.cpp -I. -std=c++11 -larm_compute -o neon_convolution - -(notice the only difference with the 32 bit command is that we don't need the -mfpu option) - -To compile natively (i.e directly on an ARM device) for OpenCL for Linux 32bit or Linux 64bit: - - g++ examples/cl_convolution.cpp test_helpers/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute -lOpenCL -o cl_convolution - - -@note These two commands assume libarm_compute.so is available in your library path, if not add the path to it using -L - -To run the built executable simply run: - - LD_LIBRARY_PATH=build ./neon_convolution - -or - - LD_LIBRARY_PATH=build ./cl_convolution - -@note If you built the library with support for both OpenCL and NEON you will need to link against OpenCL even if your application only uses NEON. - -@subsection S3_3_android Android - -For Android, the library was successfully built and tested using Google's standalone toolchains: - - arm-linux-androideabi-4.9 for armv7a (clang++) - - aarch64-linux-android-4.9 for arm64-v8a (g++) - -Here is a guide to create your Android standalone toolchains from the NDK - -- Download the NDK r14 beta 2 from here: https://developer.android.com/ndk/downloads/index.html -- Make sure you have Python 2 installed on your machine. -- Generate the 32 and/or 64 toolchains by running the following commands: - - - $NDK/build/tools/make_standalone_toolchain.py --arch arm64 --install-dir $MY_TOOLCHAINS/aarch64-linux-android-4.9 --stl gnustl - $NDK/build/tools/make_standalone_toolchain.py --arch arm --install-dir $MY_TOOLCHAINS/arm-linux-androideabi-4.9 --stl gnustl - -@attention Due to some NDK issues make sure you use g++ & gnustl for aarch64 and clang++ & gnustl for armv7 - -@note Make sure to add the toolchains to your PATH: export PATH=$PATH:$MY_TOOLCHAINS/aarch64-linux-android-4.9/bin:$MY_TOOLCHAINS/arm-linux-androideabi-4.9/bin - -@subsubsection S3_3_1_library How to build the library ? - -@note If you are building with opencl=1 then scons will expect to find libOpenCL.so either in the current directory or in "build" (See the section below if you need a stub OpenCL library to link against) - -To cross-compile the library in debug mode, with NEON only support, for Android 32bit: - - CXX=clang++ CC=clang scons Werror=1 -j8 debug=1 neon=1 opencl=0 os=android arch=armv7a - -@attention Due to some NDK issues make sure you use g++ & gnustl for aarch64 and clang++ & libc++ for armv7 - -To cross-compile the library in asserts mode, with OpenCL only support, for Android 64bit: - - scons Werror=1 -j8 debug=0 asserts=1 neon=0 opencl=1 embed_kernels=1 os=android arch=arm64-v8a - -@subsubsection S3_3_2_examples How to manually build the examples ? - -The examples get automatically built by scons as part of the build process of the library described above. This section just describes how you can build and link your own application against our library. - -@note The following command lines assume the arm_compute binaries are present in the current directory or in the system library path. - -Once you've got your Android standalone toolchain built and added to your path you can do the following: - -To cross compile a NEON example: - - #32 bit: - arm-linux-androideabi-clang++ examples/neon_convolution.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o neon_convolution_arm -static-libstdc++ -pie - #64 bit: - aarch64-linux-android-g++ examples/neon_convolution.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o neon_convolution_aarch64 -static-libstdc++ -pie - -To cross compile an OpenCL example: - - #32 bit: - arm-linux-androideabi-clang++ examples/cl_convolution.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o cl_convolution_arm -static-libstdc++ -pie -lOpenCL - #64 bit: - aarch64-linux-android-g++ examples/cl_convolution.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o cl_convolution_aarch64 -static-libstdc++ -pie -lOpenCL - -@note Due to some issues in older versions of the Mali OpenCL DDK (<= r13p0), we recommend to link arm_compute statically on Android. - -Then you need to do is upload the executable and the shared library to the device using ADB: - - adb push neon_convolution_arm /data/local/tmp/ - adb push cl_convolution_arm /data/local/tmp/ - adb shell chmod 777 -R /data/local/tmp/ - -And finally to run the example: - - adb shell /data/local/tmp/neon_convolution_arm - adb shell /data/local/tmp/cl_convolution_arm - -For 64bit: - - adb push neon_convolution_aarch64 /data/local/tmp/ - adb push cl_convolution_aarch64 /data/local/tmp/ - adb shell chmod 777 -R /data/local/tmp/ - -And finally to run the example: - - adb shell /data/local/tmp/neon_convolution_aarch64 - adb shell /data/local/tmp/cl_convolution_aarch64 - -@subsection S3_4_cl_stub_library The OpenCL stub library - -In the opencl-1.2-stubs folder you will find the sources to build a stub OpenCL library which then can be used to link your application or arm_compute against. - -If you preferred you could retrieve the OpenCL library from your device and link against this one but often this library will have dependencies on a range of system libraries forcing you to link your application against those too even though it is not using them. - -@warning This OpenCL library provided is a stub and *not* a real implementation. You can use it to resolve OpenCL's symbols in arm_compute while building the example but you must make sure the real libOpenCL.so is in your PATH when running the example or it will not work. - -To cross-compile the stub OpenCL library simply run: - - -gcc -o libOpenCL.so -Iinclude opencl-1.2-stubs/opencl_stubs.c -fPIC -shared - -For example: - - -gcc -o libOpenCL.so -Iinclude opencl-1.2-stubs/opencl_stubs.c -fPIC -shared - #Linux 32bit - arm-linux-gnueabihf-gcc -o libOpenCL.so -Iinclude opencl-1.2-stubs/opencl_stubs.c -fPIC -shared - #Linux 64bit - aarch64-linux-gnu-gcc -o libOpenCL.so -Iinclude -shared opencl-1.2-stubs/opencl_stubs.c -fPIC - #Android 32bit - arm-linux-androideabi-clang -o libOpenCL.so -Iinclude -shared opencl-1.2-stubs/opencl_stubs.c -fPIC -shared - #Android 64bit - aarch64-linux-android-gcc -o libOpenCL.so -Iinclude -shared opencl-1.2-stubs/opencl_stubs.c -fPIC -shared - -@section S4_architecture Library Architecture - -@subsection S4_1 Core vs Runtime libraries - -The Core library is a low level collection of algorithms implementations, it is designed to be embedded in existing projects and applications: - -- It doesn't allocate any memory (All the memory allocations/mappings have to be handled by the caller). -- It doesn't perform any kind of multi-threading (but provide information to the caller about how the workload can be split). - -The Runtime library is a very basic wrapper around the Core library which can be used for quick prototyping, it is basic in the sense that: - -- It allocates images and tensors are allocatd using standard malloc(). -- It multi-threads NEON code in a very basic way using a very simple pool of threads. -- For OpenCL it will use the default CLScheduler command queue for all mapping operations and kernels. - -For maximum performance, it is expected that the users would re-implement an equivalent to the runtime library which suits better their needs (With a more clever multi-threading strategy, load-balancing between NEON and OpenCL, etc.) - -@subsection S4_2_windows_kernels_mt_functions Windows, kernels, multi-threading and functions - -@subsubsection S4_2_1_windows Windows - -A @ref Window represents a workload to execute, it's made of up to @ref Coordinates::num_max_dimensions dimensions. -Each dimension is defined by a start, end and step. - -It can split into subwindows as long as *all* the following rules remain true for all the dimensions: - -- max[n].start() <= sub[n].start() < max[n].end() -- sub[n].start() < sub[n].end() <= max[n].end() -- max[n].step() == sub[n].step() -- (sub[n].start() - max[n].start()) % max[n].step() == 0 -- (sub[n].end() - sub[n].start()) % max[n].step() == 0 - -@subsubsection S4_2_2 Kernels - -Each implementation of the @ref IKernel interface (base class of all the kernels in the core library) works in the same way: - -OpenCL kernels: - -@code{.cpp} -// Initialise the CLScheduler with the default context and default command queue -// Also initialises the CLKernelLibrary to use ./cl_kernels as location for OpenCL kernels files and sets a default device for which OpenCL programs are built. -CLScheduler::get().default_init(); - -cl::CommandQueue q = CLScheduler::get().queue(); -//Create a kernel object: -MyKernel kernel; -// Initialize the kernel with the input/output and options you want to use: -kernel.configure( input, output, option0, option1); -// Retrieve the execution window of the kernel: -const Window& max_window = kernel.window(); -// Run the whole kernel in the current thread: -kernel.run( q, max_window ); // Enqueue the kernel to process the full window on the default queue - -// Wait for the processing to complete: -q.finish(); -@endcode - -NEON / CPP kernels: - -@code{.cpp} -//Create a kernel object: -MyKernel kernel; -// Initialize the kernel with the input/output and options you want to use: -kernel.configure( input, output, option0, option1); -// Retrieve the execution window of the kernel: -const Window& max_window = kernel.window(); -// Run the whole kernel in the current thread: -kernel.run( max_window ); // Run the kernel on the full window -@endcode - -@subsubsection S4_2_3 Multi-threading - -The previous section shows how to run a NEON / CPP kernel in the current thread, however if your system has several CPU cores, you will probably want the kernel to use several cores. Here is how this can be done: - -@snippet src/runtime/CPP/CPPScheduler.cpp Scheduler example - -This is the very basic implementation used in the NEON runtime library by all the NEON functions. - -@sa CPPScheduler. - -@note Some kernels like for example @ref NEHistogramKernel need some local temporary buffer to perform their calculations. In order to avoid memory corruption between threads, the local buffer must be of size: ```memory_needed_per_thread * num_threads``` and each subwindow must be initialised by calling @ref Window::set_thread_id() with a unique thread_id between 0 and num_threads. - -@subsubsection S4_2_4 Functions - -Functions will automatically allocate the temporary buffers mentioned above, and will automatically multi-thread kernels' executions using the very basic scheduler described in the previous section. - -Simple functions are made of a single kernel (e.g @ref NEConvolution3x3), while more complex ones will be made of a several kernels pipelined together (e.g @ref NEGaussianPyramid, @ref NEHarrisCorners), check their documentation to find out which kernels are used by each function. - -@code{.cpp} -//Create a function object: -MyFunction function; -// Initialize the function with the input/output and options you want to use: -function.configure( input, output, option0, option1); -// Execute the function: -function.run(); -@endcode - -@warning ARM Compute libraries require Mali OpenCL DDK r8p0 or above(OpenCL kernels are compiled using the -cl-arm-non-uniform-work-group-size flag) - -@note All OpenCL functions and objects in the runtime library use the command queue associated with CLScheduler for all operations, a real implementation would be expected to use different queues for mapping operations and kernels in order to reach a better GPU utilisation. - -@subsubsection S4_4_1_cl_scheduler OpenCL Scheduler and kernel library - -The ARM Compute runtime uses a single command queue and context for all the operations. - -The user can get / set this context and command queue through the CLScheduler's interface. - -@attention Make sure the application is using the same context as the library as in OpenCL it is forbidden to share objects across contexts. This is done by calling @ref CLScheduler::init() or @ref CLScheduler::default_init() at the beginning of your application. - -All the OpenCL kernels used by the library are built and stored in the @ref CLKernelLibrary. -If the library is compiled with embed_kernels=0 the application can set the path to the OpenCL kernels by calling @ref CLKernelLibrary::init(), by default the path is set to "./cl_kernels" - -@subsubsection S4_4_2_events_sync OpenCL events and synchronisation - -In order to block until all the jobs in the CLScheduler's command queue are done executing the user can call @ref CLScheduler::sync() or create a sync event using @ref CLScheduler::enqueue_sync_event() - -For example: -@snippet cl_events.cpp OpenCL events - -@subsubsection S4_4_2_cl_neon OpenCL / NEON interoperability - -You can mix OpenCL and NEON kernels and or functions, however it is the user's responsibility to handle the mapping unmapping of the OpenCL objects, for example: - -@snippet neoncl_scale_median_gaussian.cpp NEON / OpenCL Interop - -@sa main_neoncl_scale_median_gaussian - -@subsection S4_5_algorithms Algorithms - -All algorithms in this library have been implemented following the [OpenVX 1.1 specifications](https://www.khronos.org/registry/vx/specs/1.1/html/) -Please refer to the Khronos documentation for more information. - -@subsection S4_6_images_tensors Images, padding, border modes and tensors - -Most kernels and functions in the library process images, however, in order to be future proof most of the kernels actually accept tensors, see below for more information about they are related. - -@attention Each memory object can be written by only one kernel, however it can be read by several kernels. Writing to the same object from several kernels will result in undefined behaviour. The kernel writing to an object must be configured before the kernel(s) reading from it. - -@subsubsection S4_6_1_padding_and_border Padding and border modes - -Several algorithms rely on neighbour pixels to calculate the value of a given pixel: this means the algorithm will not be able to process the borders of the image unless you give it more information about what you want to happen for border pixels, this is the @ref BorderMode. - -You have 3 types of @ref BorderMode : - -- @ref BorderMode::UNDEFINED : if you are missing pixel values then don't calculate the value. As a result all the pixels which are on the border will have a value which is undefined. -- @ref BorderMode::REPLICATE : if you are missing pixel values then assume the missing pixels have the same value as the closest valid pixel. -- @ref BorderMode::CONSTANT : if you are missing pixel values then assume the missing pixels all have the same constant value (The user can choose what this value should be). - -Moreover both OpenCL and NEON use vector loads and stores instructions to access the data in buffers, so in order to avoid having special cases to handle for the borders all the images and tensors used in this library must be padded. - -@paragraph padding Padding - -There are different ways padding can be calculated: - -- Accurate padding: - -@snippet neon_convolution.cpp Accurate padding - -@note It's important to call allocate @b after the function is configured: if the image / tensor is already allocated then the function will shrink its execution window instead of increasing the padding. (See below for more details). - -- Manual padding / no padding / auto padding: You can allocate your images / tensors up front (before configuring your functions), in that case the function will use whatever padding is available and will shrink its execution window if there isn't enough padding available (Which will translates into a smaller valid region for the output. See also @ref valid_region). -If you don't want to manually set the padding but still want to allocate your objects upfront then you can use auto_padding. - -@code{.cpp} -Image src, dst; - -// Use auto padding for the input: -src.info()->init_auto_padding(TensorShape(640u,480u), Format::U8); - -// Use manual padding for the destination image -dst.info()->init(src.info()->tensor_shape(), Format::U8, strides_in_bytes, offset_first_element_in_bytes, total_size_in_bytes); - -// Allocate all the images -src.allocator()->allocate(); -dst.allocator()->allocate(); -// Fill the input image with the content of the PPM image if a filename was provided: -fill_image(src); - -NEGaussian3x3 gauss; - -// Apply a Gaussian 3x3 filter to the source image (Note: if the padding provided is not enough then the execution window and valid region of the output will be shrunk) -gauss.configure(&src, &dst, BorderMode::UNDEFINED); - -//Execute the functions: -gauss.run(); -@endcode - -@warning Some kernels need up to 3 neighbour values to calculate the value of a given pixel, therefore to be safe we use a 4 pixels padding all around the image and some kernels read and write up to 32 pixels at the time, therefore we add an extra 32 pixels of padding at the end of each row to be safe. As a result auto padded buffers waste a lot of memory and are less cache friendly. It is therefore recommended to use accurate padding or manual padding wherever possible. - -@paragraph valid_region Valid regions - -Some kernels (like edge detectors for example) need to read values of neighbouring pixels to calculate the value of a given pixel, it is therefore not possible to calculate the values of the pixels on the edges. - -Another case is: if a kernel processes 8 pixels per iteration then if the image's dimensions is not a multiple of 8 and not enough padding is available then the kernel will not be able to process the pixels near the right edge as a result these pixels will be left undefined. - -In order to know which pixels have been calculated, each kernel sets a valid region for each output image or tensor. See also @ref TensorInfo::valid_region(), @ref ValidRegion - -@subsubsection S4_6_2_tensors Tensors - -Tensors are multi-dimensional arrays made of up to @ref Coordinates::num_max_dimensions dimensions. - -A simple vector of numbers can be represented as a 1D tensor, an image is actually just a 2D tensor, a 3D tensor can be seen as an array of images, a 4D tensor as a 2D array of images, etc. - -@note Most algorithms process images (i.e a 2D slice of the tensor), therefore only padding along the X and Y axes is required (2D slices can be stored contiguously in memory). - -@subsubsection S4_6_3_description_conventions Images and Tensors description conventions - -Image objects are defined by a @ref Format and dimensions expressed as [width, height, batch] - -Tensors are defined by a @ref DataType plus a number of channels (Always expected to be 1 for now) and their dimensions are expressed as [width, height, feature_maps, batch]. - -In other words, the lower three dimensions of a tensor specify a single input in [width, height, feature_maps], while any other specified dimension represents a batch in the appropriate dimension space. -For example, a tensor with dimensions [128, 128, 64, 16] represents a 1D batch space with 16 batches of 128 elements in width and height and 64 feature maps each. -Each kernel specifies the expected layout of each of its tensors in its documentation. - -@note Unless specified otherwise in the kernel's or function's documentation all tensors and images parameters passed must have identical dimensions. - -@note Unless specified otherwise in the kernel's or function's documentation the number of channels for tensors is expected to be 1 (For images, the number of channels is inferred from the @ref Format). - -@attention Regardless of the @ref DataType used by a tensor the @ref ITensor::buffer() method will always return a uint8_t pointer, and all the metadata in @ref TensorInfo will be expressed in bytes. It is the user's responsibility to cast the pointer to the correct type. - -For example, to read the element located at the coordinates (x,y) of a float tensor: - -@code{.cpp} -float value = *reinterpret_cast(input.buffer() + input.info()->offset_element_in_bytes(Coordinates(x,y))); -@endcode - -@subsubsection S4_6_4_working_with_objects Working with Images and Tensors using iterators - -The library provides some iterators to access objects' data. -Iterators are created by associating a data object (An image or a tensor for example) with an iteration window. - -Iteration windows are defined by an array of dimension, each of which is made of a start, end and step. - -The @ref execute_window_loop function takes an execution window, a lambda function and one or more iterators. -It will iterate through every element of the execution window and for each element it will update the iterators accordingly and call the lambda function. - -Here is a couple of examples of how to use the iterators to fill / read tensors: - -@snippet examples/neon_copy_objects.cpp Copy objects example - -*/ - } diff --git a/documentation/_utils_8cpp.xhtml b/documentation/00__introduction_8dox.xhtml similarity index 82% rename from documentation/_utils_8cpp.xhtml rename to documentation/00__introduction_8dox.xhtml index 48ca7a7..7b4bf01 100644 --- a/documentation/_utils_8cpp.xhtml +++ b/documentation/00__introduction_8dox.xhtml @@ -6,7 +6,7 @@ -ARM Compute Library: test_helpers/Utils.cpp File Reference +Compute Library: docs/00_introduction.dox File Reference @@ -39,8 +39,8 @@ -
ARM Compute Library -  17.05 +
Compute Library +  v17.06
@@ -55,6 +55,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
@@ -113,23 +114,16 @@ $(document).ready(function(){initNavTree('_utils_8cpp.xhtml','');});
-
Utils.cpp File Reference
+
docs/00_introduction.dox File Reference
-
#include "Utils.h"
-#include <cctype>
-#include <cerrno>
-#include <iomanip>
-#include <string>
-
-

Go to the source code of this file.

@@ -129,8 +130,8 @@ Namespaces
+ + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + + +
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
docs/02_tests.dox File Reference
+
+
+
+
+ + + + diff --git a/documentation/_absolute_difference_8cpp.xhtml b/documentation/_absolute_difference_8cpp.xhtml new file mode 100644 index 0000000..6f34d72 --- /dev/null +++ b/documentation/_absolute_difference_8cpp.xhtml @@ -0,0 +1,151 @@ + + + + + + + + +Compute Library: tests/validation/NEON/AbsoluteDifference.cpp File Reference + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
AbsoluteDifference.cpp File Reference
+
+
+
#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "boost_wrapper.h"
+#include <random>
+#include <string>
+
+

Go to the source code of this file.

+
+
+ + + + diff --git a/documentation/_absolute_difference_8cpp_source.xhtml b/documentation/_absolute_difference_8cpp_source.xhtml new file mode 100644 index 0000000..dacbbc3 --- /dev/null +++ b/documentation/_absolute_difference_8cpp_source.xhtml @@ -0,0 +1,177 @@ + + + + + + + + +Compute Library: tests/validation/NEON/AbsoluteDifference.cpp Source File + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
AbsoluteDifference.cpp
+
+
+Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "Globals.h"
25 #include "NEON/Helper.h"
26 #include "NEON/NEAccessor.h"
27 #include "TensorLibrary.h"
28 #include "TypePrinter.h"
29 #include "Utils.h"
30 #include "validation/Datasets.h"
31 #include "validation/Reference.h"
32 #include "validation/Validation.h"
33 
35 #include "arm_compute/core/Types.h"
39 
40 #include "boost_wrapper.h"
41 
42 #include <random>
43 #include <string>
44 
45 using namespace arm_compute;
46 using namespace arm_compute::test;
47 using namespace arm_compute::test::neon;
48 using namespace arm_compute::test::validation;
49 
50 namespace
51 {
61 Tensor compute_absolute_difference(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out)
62 {
63  // Create tensors
64  Tensor src1 = create_tensor(shape, dt_in0);
65  Tensor src2 = create_tensor(shape, dt_in1);
66  Tensor dst = create_tensor(shape, dt_out);
67 
68  // Create and configure function
70  abs_d.configure(&src1, &src2, &dst);
71 
72  // Allocate tensors
73  src1.allocator()->allocate();
74  src2.allocator()->allocate();
75  dst.allocator()->allocate();
76 
77  BOOST_TEST(!src1.info()->is_resizable());
78  BOOST_TEST(!src2.info()->is_resizable());
79  BOOST_TEST(!dst.info()->is_resizable());
80 
81  // Fill tensors
82  library->fill_tensor_uniform(NEAccessor(src1), 0);
83  library->fill_tensor_uniform(NEAccessor(src2), 1);
84 
85  // Compute function
86  abs_d.run();
87 
88  return dst;
89 }
90 
91 void validate_configuration(const Tensor &src1, const Tensor &src2, Tensor &dst, TensorShape shape)
92 {
93  BOOST_TEST(src1.info()->is_resizable());
94  BOOST_TEST(src2.info()->is_resizable());
95  BOOST_TEST(dst.info()->is_resizable());
96 
97  // Create and configure function
99  abs_d.configure(&src1, &src2, &dst);
100 
101  // Validate valid region
102  const ValidRegion valid_region = shape_to_valid_region(shape);
103  validate(src1.info()->valid_region(), valid_region);
104  validate(src2.info()->valid_region(), valid_region);
105  validate(dst.info()->valid_region(), valid_region);
106 
107  // Validate padding
108  const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
109  validate(src1.info()->padding(), padding);
110  validate(src2.info()->padding(), padding);
111  validate(dst.info()->padding(), padding);
112 }
113 } // namespace
114 
115 #ifndef DOXYGEN_SKIP_THIS
116 BOOST_AUTO_TEST_SUITE(NEON)
117 BOOST_AUTO_TEST_SUITE(AbsoluteDifference)
118 
119 BOOST_AUTO_TEST_SUITE(U8)
120 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
121 BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()),
122  shape)
123 {
124  // Create tensors
125  Tensor src1 = create_tensor(shape, DataType::U8);
126  Tensor src2 = create_tensor(shape, DataType::U8);
127  Tensor dst = create_tensor(shape, DataType::U8);
128 
129  validate_configuration(src1, src2, dst, shape);
130 }
131 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
132 BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(),
133  shape)
134 {
135  // Compute function
136  Tensor dst = compute_absolute_difference(shape, DataType::U8, DataType::U8, DataType::U8);
137 
138  // Compute reference
139  RawTensor ref_dst = Reference::compute_reference_absolute_difference(shape, DataType::U8, DataType::U8, DataType::U8);
140 
141  // Validate output
142  validate(NEAccessor(dst), ref_dst);
143 }
144 BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
145 BOOST_DATA_TEST_CASE(RunLarge, LargeShapes(),
146  shape)
147 {
148  // Compute function
149  Tensor dst = compute_absolute_difference(shape, DataType::U8, DataType::U8, DataType::U8);
150 
151  // Compute reference
152  RawTensor ref_dst = Reference::compute_reference_absolute_difference(shape, DataType::U8, DataType::U8, DataType::U8);
153 
154  // Validate output
155  validate(NEAccessor(dst), ref_dst);
156 }
157 BOOST_AUTO_TEST_SUITE_END()
158 
159 BOOST_AUTO_TEST_SUITE(S16)
160 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
161 BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ DataType::U8, DataType::S16 }),
162  shape, dt)
163 {
164  // Create tensors
165  Tensor src1 = create_tensor(shape, dt);
166  Tensor src2 = create_tensor(shape, DataType::S16);
167  Tensor dst = create_tensor(shape, DataType::S16);
168 
169  validate_configuration(src1, src2, dst, shape);
170 }
171 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
172 BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }),
173  shape, dt)
174 {
175  // Compute function
176  Tensor dst = compute_absolute_difference(shape, dt, DataType::S16, DataType::S16);
177 
178  // Compute reference
179  RawTensor ref_dst = Reference::compute_reference_absolute_difference(shape, dt, DataType::S16, DataType::S16);
180 
181  // Validate output
182  validate(NEAccessor(dst), ref_dst);
183 }
184 BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
185 BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }),
186  shape, dt)
187 {
188  // Compute function
189  Tensor dst = compute_absolute_difference(shape, dt, DataType::S16, DataType::S16);
190 
191  // Compute reference
192  RawTensor ref_dst = Reference::compute_reference_absolute_difference(shape, dt, DataType::S16, DataType::S16);
193 
194  // Validate output
195  validate(NEAccessor(dst), ref_dst);
196 }
197 BOOST_AUTO_TEST_SUITE_END()
198 
199 BOOST_AUTO_TEST_SUITE_END()
200 BOOST_AUTO_TEST_SUITE_END()
201 #endif
Tensor create_tensor(const TensorShape &shape, DataType data_type, int num_channels=1, int fixed_point_position=0)
Helper to create an empty tensor.
Definition: Helper.h:47
+ +
Simple tensor object that stores elements in a consecutive chunk of memory.
Definition: RawTensor.h:47
+
Shape of a tensor.
Definition: TensorShape.h:38
+ + +
Container for 2D border size.
Definition: Types.h:131
+
int required_padding(int size, int step)
Calculate the required padding given the available size and the required.
Definition: Utils.h:486
+ +
Unknown image format.
+ + + +
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
+
TensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
+
T x() const
Alias to access the size of the first dimension.
Definition: Dimensions.h:81
+
virtual bool is_resizable() const =0
Flag indicating whether the size of the tensor can be changed.
+ +
Data set containing small tensor shapes.
+ + +
ValidRegion shape_to_valid_region(TensorShape shape)
Create a valid region covering the enitre tensor shape.
Definition: Utils.h:452
+
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
+
Basic implementation of the tensor interface.
Definition: Tensor.h:37
+ +
virtual PaddingSize padding() const =0
Padding of tensor.
+
void configure(const ITensor *input1, const ITensor *input2, ITensor *output)
Set the inputs and output images.
+
1 channel, 1 U8 per channel
+
Data set containing large tensor shapes.
+ +
std::unique_ptr< TensorLibrary > library
Definition: main.cpp:50
+
Basic function to run NEAbsoluteDifferenceKernel.
+ +
void run() overridefinal
Run the kernels contained in the function.
+
Accessor implementation for Tensor objects.
Definition: NEAccessor.h:38
+
void validate(const arm_compute::ValidRegion &region, const arm_compute::ValidRegion &reference)
Validate valid regions.
Definition: Validation.cpp:166
+ +
Definition: Datasets.h:47
+ + + + + +
DataType
Available data types.
Definition: Types.h:60
+
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor&#39;s metadata.
+
+
+ + + + diff --git a/documentation/_access_window_auto_padding_8h.xhtml b/documentation/_access_window_auto_padding_8h.xhtml index 6aa125f..892ab64 100644 --- a/documentation/_access_window_auto_padding_8h.xhtml +++ b/documentation/_access_window_auto_padding_8h.xhtml @@ -6,7 +6,7 @@ -ARM Compute Library: arm_compute/core/AccessWindowAutoPadding.h File Reference +Compute Library: arm_compute/core/AccessWindowAutoPadding.h File Reference @@ -39,8 +39,8 @@ -
ARM Compute Library -  17.05 +
Compute Library +  v17.06
@@ -55,6 +55,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search'); + + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
Accumulate.cpp File Reference
+
+
+
#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEAccumulate.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "boost_wrapper.h"
+#include <random>
+#include <string>
+
+

Go to the source code of this file.

+
+
+ + + + diff --git a/documentation/_accumulate_8cpp_source.xhtml b/documentation/_accumulate_8cpp_source.xhtml new file mode 100644 index 0000000..f720223 --- /dev/null +++ b/documentation/_accumulate_8cpp_source.xhtml @@ -0,0 +1,175 @@ + + + + + + + + +Compute Library: tests/validation/NEON/Accumulate.cpp Source File + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
Accumulate.cpp
+
+
+Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "Globals.h"
25 #include "NEON/Helper.h"
26 #include "NEON/NEAccessor.h"
27 #include "TensorLibrary.h"
28 #include "TypePrinter.h"
29 #include "Utils.h"
30 #include "validation/Datasets.h"
31 #include "validation/Reference.h"
32 #include "validation/Validation.h"
33 
35 #include "arm_compute/core/Types.h"
39 
40 #include "boost_wrapper.h"
41 
42 #include <random>
43 #include <string>
44 
45 using namespace arm_compute;
46 using namespace arm_compute::test;
47 using namespace arm_compute::test::neon;
48 using namespace arm_compute::test::validation;
49 
50 namespace
51 {
58 Tensor compute_accumulate(const TensorShape &shape)
59 {
60  // Create tensors
61  Tensor src = create_tensor(shape, DataType::U8);
62  Tensor dst = create_tensor(shape, DataType::S16);
63 
64  // Create and configure function
65  NEAccumulate acc;
66  acc.configure(&src, &dst);
67 
68  // Allocate tensors
69  src.allocator()->allocate();
70  dst.allocator()->allocate();
71 
72  BOOST_TEST(!src.info()->is_resizable());
73  BOOST_TEST(!dst.info()->is_resizable());
74 
75  // Fill tensors
76  library->fill_tensor_uniform(NEAccessor(src), 0);
77  library->fill_tensor_uniform(NEAccessor(dst), 1);
78 
79  // Compute function
80  acc.run();
81 
82  return dst;
83 }
84 } // namespace
85 
86 #ifndef DOXYGEN_SKIP_THIS
87 BOOST_AUTO_TEST_SUITE(NEON)
88 BOOST_AUTO_TEST_SUITE(Accumulate)
89 
90 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
91 BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()),
92  shape)
93 {
94  // Create tensors
95  Tensor src = create_tensor(shape, DataType::U8);
96  Tensor dst = create_tensor(shape, DataType::S16);
97 
98  BOOST_TEST(src.info()->is_resizable());
99  BOOST_TEST(dst.info()->is_resizable());
100 
101  // Create and configure function
102  NEAccumulate acc;
103  acc.configure(&src, &dst);
104 
105  // Validate valid region
106  const ValidRegion valid_region = shape_to_valid_region(shape);
107  validate(src.info()->valid_region(), valid_region);
108  validate(dst.info()->valid_region(), valid_region);
109 
110  // Validate padding
111  const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
112  validate(src.info()->padding(), padding);
113  validate(dst.info()->padding(), padding);
114 }
115 
116 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
117 BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(),
118  shape)
119 {
120  // Compute function
121  Tensor dst = compute_accumulate(shape);
122 
123  // Compute reference
124  RawTensor ref_dst = Reference::compute_reference_accumulate(shape);
125 
126  // Validate output
127  validate(NEAccessor(dst), ref_dst);
128 }
129 
130 BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
131 BOOST_DATA_TEST_CASE(RunLarge, LargeShapes(),
132  shape)
133 {
134  // Compute function
135  Tensor dst = compute_accumulate(shape);
136 
137  // Compute reference
138  RawTensor ref_dst = Reference::compute_reference_accumulate(shape);
139 
140  // Validate output
141  validate(NEAccessor(dst), ref_dst);
142 }
143 
144 BOOST_AUTO_TEST_SUITE_END()
145 BOOST_AUTO_TEST_SUITE_END()
146 #endif
Tensor create_tensor(const TensorShape &shape, DataType data_type, int num_channels=1, int fixed_point_position=0)
Helper to create an empty tensor.
Definition: Helper.h:47
+
Basic function to run NEAccumulateKernel.
Definition: NEAccumulate.h:36
+ +
Simple tensor object that stores elements in a consecutive chunk of memory.
Definition: RawTensor.h:47
+
Shape of a tensor.
Definition: TensorShape.h:38
+ + +
Container for 2D border size.
Definition: Types.h:131
+
int required_padding(int size, int step)
Calculate the required padding given the available size and the required.
Definition: Utils.h:486
+ +
Unknown image format.
+ + + +
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
+
TensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
+ +
virtual bool is_resizable() const =0
Flag indicating whether the size of the tensor can be changed.
+ +
Data set containing small tensor shapes.
+ + +
ValidRegion shape_to_valid_region(TensorShape shape)
Create a valid region covering the enitre tensor shape.
Definition: Utils.h:452
+
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
+
Basic implementation of the tensor interface.
Definition: Tensor.h:37
+ +
virtual PaddingSize padding() const =0
Padding of tensor.
+
1 channel, 1 U8 per channel
+
Data set containing large tensor shapes.
+
std::unique_ptr< TensorLibrary > library
Definition: main.cpp:50
+ +
void run() overridefinal
Run the kernels contained in the function.
+
Accessor implementation for Tensor objects.
Definition: NEAccessor.h:38
+
void validate(const arm_compute::ValidRegion &region, const arm_compute::ValidRegion &reference)
Validate valid regions.
Definition: Validation.cpp:166
+ +
Definition: Datasets.h:47
+ + + + +
void configure(const ITensor *input, ITensor *output)
Set the input and accumulation tensors.
+ +
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor&#39;s metadata.
+
+
+ + + + diff --git a/documentation/_accumulate_squared_8cpp.xhtml b/documentation/_accumulate_squared_8cpp.xhtml new file mode 100644 index 0000000..441f2c5 --- /dev/null +++ b/documentation/_accumulate_squared_8cpp.xhtml @@ -0,0 +1,151 @@ + + + + + + + + +Compute Library: tests/validation/NEON/AccumulateSquared.cpp File Reference + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
AccumulateSquared.cpp File Reference
+
+
+
#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEAccumulate.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "boost_wrapper.h"
+#include <random>
+#include <string>
+
+

Go to the source code of this file.

+
+
+ + + + diff --git a/documentation/_accumulate_squared_8cpp_source.xhtml b/documentation/_accumulate_squared_8cpp_source.xhtml new file mode 100644 index 0000000..ef39c7f --- /dev/null +++ b/documentation/_accumulate_squared_8cpp_source.xhtml @@ -0,0 +1,176 @@ + + + + + + + + +Compute Library: tests/validation/NEON/AccumulateSquared.cpp Source File + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
AccumulateSquared.cpp
+
+
+Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "Globals.h"
25 #include "NEON/Helper.h"
26 #include "NEON/NEAccessor.h"
27 #include "TensorLibrary.h"
28 #include "TypePrinter.h"
29 #include "Utils.h"
30 #include "validation/Datasets.h"
31 #include "validation/Reference.h"
32 #include "validation/Validation.h"
33 
35 #include "arm_compute/core/Types.h"
39 
40 #include "boost_wrapper.h"
41 
42 #include <random>
43 #include <string>
44 
45 using namespace arm_compute;
46 using namespace arm_compute::test;
47 using namespace arm_compute::test::neon;
48 using namespace arm_compute::test::validation;
49 
50 namespace
51 {
58 Tensor compute_accumulate_squared(const TensorShape &shape, uint32_t shift)
59 {
60  // Create tensors
61  Tensor src = create_tensor(shape, DataType::U8);
62  Tensor dst = create_tensor(shape, DataType::S16);
63 
64  // Create and configure function
66  acc.configure(&src, shift, &dst);
67 
68  // Allocate tensors
69  src.allocator()->allocate();
70  dst.allocator()->allocate();
71 
72  BOOST_TEST(!src.info()->is_resizable());
73  BOOST_TEST(!dst.info()->is_resizable());
74 
75  // Fill tensors
76  // dst tensor filled with non-negative values
77  library->fill_tensor_uniform(NEAccessor(src), 0);
78  library->fill_tensor_uniform(NEAccessor(dst), 1, static_cast<int16_t>(0), std::numeric_limits<int16_t>::max());
79 
80  // Compute function
81  acc.run();
82 
83  return dst;
84 }
85 } // namespace
86 
87 #ifndef DOXYGEN_SKIP_THIS
88 BOOST_AUTO_TEST_SUITE(NEON)
89 BOOST_AUTO_TEST_SUITE(AccumulateSquared)
90 
91 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
92 BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::xrange(0U, 16U),
93  shape, shift)
94 {
95  // Create tensors
96  Tensor src = create_tensor(shape, DataType::U8);
97  Tensor dst = create_tensor(shape, DataType::S16);
98 
99  BOOST_TEST(src.info()->is_resizable());
100  BOOST_TEST(dst.info()->is_resizable());
101 
102  // Create and configure function
104  acc.configure(&src, shift, &dst);
105 
106  // Validate valid region
107  const ValidRegion valid_region = shape_to_valid_region(shape);
108  validate(src.info()->valid_region(), valid_region);
109  validate(dst.info()->valid_region(), valid_region);
110 
111  // Validate padding
112  const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
113  validate(src.info()->padding(), padding);
114  validate(dst.info()->padding(), padding);
115 }
116 
117 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
118 BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::xrange(0U, 16U),
119  shape, shift)
120 {
121  // Compute function
122  Tensor dst = compute_accumulate_squared(shape, shift);
123 
124  // Compute reference
125  RawTensor ref_dst = Reference::compute_reference_accumulate_squared(shape, shift);
126 
127  // Validate output
128  validate(NEAccessor(dst), ref_dst);
129 }
130 
131 BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
132 BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ 0U, 1U, 15U }),
133  shape, shift)
134 {
135  // Compute function
136  Tensor dst = compute_accumulate_squared(shape, shift);
137 
138  // Compute reference
139  RawTensor ref_dst = Reference::compute_reference_accumulate_squared(shape, shift);
140 
141  // Validate output
142  validate(NEAccessor(dst), ref_dst);
143 }
144 
145 BOOST_AUTO_TEST_SUITE_END()
146 BOOST_AUTO_TEST_SUITE_END()
147 #endif
Tensor create_tensor(const TensorShape &shape, DataType data_type, int num_channels=1, int fixed_point_position=0)
Helper to create an empty tensor.
Definition: Helper.h:47
+ +
Simple tensor object that stores elements in a consecutive chunk of memory.
Definition: RawTensor.h:47
+
Shape of a tensor.
Definition: TensorShape.h:38
+ + +
Container for 2D border size.
Definition: Types.h:131
+
int required_padding(int size, int step)
Calculate the required padding given the available size and the required.
Definition: Utils.h:486
+ +
Unknown image format.
+ + + +
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
+
TensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
+ +
virtual bool is_resizable() const =0
Flag indicating whether the size of the tensor can be changed.
+ +
Data set containing small tensor shapes.
+ + +
ValidRegion shape_to_valid_region(TensorShape shape)
Create a valid region covering the enitre tensor shape.
Definition: Utils.h:452
+
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
+
void configure(const ITensor *input, uint32_t shift, ITensor *output)
Set the input and accumulation tensors and the shift value.
+
Basic implementation of the tensor interface.
Definition: Tensor.h:37
+ +
virtual PaddingSize padding() const =0
Padding of tensor.
+
1 channel, 1 U8 per channel
+
Data set containing large tensor shapes.
+
std::unique_ptr< TensorLibrary > library
Definition: main.cpp:50
+ +
void run() overridefinal
Run the kernels contained in the function.
+
Accessor implementation for Tensor objects.
Definition: NEAccessor.h:38
+
void validate(const arm_compute::ValidRegion &region, const arm_compute::ValidRegion &reference)
Validate valid regions.
Definition: Validation.cpp:166
+ +
Definition: Datasets.h:47
+ +
Basic function to run NEAccumulateSquaredKernel.
Definition: NEAccumulate.h:62
+
fixed_point< T > max(fixed_point< T > x, fixed_point< T > y)
Definition: FixedPoint.h:880
+ + + + +
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor&#39;s metadata.
+
+
+ + + + diff --git a/documentation/_accumulate_weighted_8cpp.xhtml b/documentation/_accumulate_weighted_8cpp.xhtml new file mode 100644 index 0000000..2134681 --- /dev/null +++ b/documentation/_accumulate_weighted_8cpp.xhtml @@ -0,0 +1,151 @@ + + + + + + + + +Compute Library: tests/validation/NEON/AccumulateWeighted.cpp File Reference + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
AccumulateWeighted.cpp File Reference
+
+
+
#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEAccumulate.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "boost_wrapper.h"
+#include <random>
+#include <string>
+
+

Go to the source code of this file.

+
+
+ + + + diff --git a/documentation/_accumulate_weighted_8cpp_source.xhtml b/documentation/_accumulate_weighted_8cpp_source.xhtml new file mode 100644 index 0000000..1b99fe7 --- /dev/null +++ b/documentation/_accumulate_weighted_8cpp_source.xhtml @@ -0,0 +1,175 @@ + + + + + + + + +Compute Library: tests/validation/NEON/AccumulateWeighted.cpp Source File + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
AccumulateWeighted.cpp
+
+
+Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "Globals.h"
25 #include "NEON/Helper.h"
26 #include "NEON/NEAccessor.h"
27 #include "TensorLibrary.h"
28 #include "TypePrinter.h"
29 #include "Utils.h"
30 #include "validation/Datasets.h"
31 #include "validation/Reference.h"
32 #include "validation/Validation.h"
33 
35 #include "arm_compute/core/Types.h"
39 
40 #include "boost_wrapper.h"
41 
42 #include <random>
43 #include <string>
44 
45 using namespace arm_compute;
46 using namespace arm_compute::test;
47 using namespace arm_compute::test::neon;
48 using namespace arm_compute::test::validation;
49 
50 namespace
51 {
58 Tensor compute_accumulate_weighted(const TensorShape &shape, float alpha)
59 {
60  // Create tensors
61  Tensor src = create_tensor(shape, DataType::U8);
62  Tensor dst = create_tensor(shape, DataType::U8);
63 
64  // Create and configure function
66  acc.configure(&src, alpha, &dst);
67 
68  // Allocate tensors
69  src.allocator()->allocate();
70  dst.allocator()->allocate();
71 
72  BOOST_TEST(!src.info()->is_resizable());
73  BOOST_TEST(!dst.info()->is_resizable());
74 
75  // Fill tensors
76  library->fill_tensor_uniform(NEAccessor(src), 0);
77  library->fill_tensor_uniform(NEAccessor(dst), 1);
78 
79  // Compute function
80  acc.run();
81 
82  return dst;
83 }
84 } // namespace
85 
86 #ifndef DOXYGEN_SKIP_THIS
87 BOOST_AUTO_TEST_SUITE(NEON)
88 BOOST_AUTO_TEST_SUITE(AccumulateWeighted)
89 
90 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
91 BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ 0.f, 0.5f, 1.f }),
92  shape, alpha)
93 {
94  // Create tensors
95  Tensor src = create_tensor(shape, DataType::U8);
96  Tensor dst = create_tensor(shape, DataType::U8);
97 
98  BOOST_TEST(src.info()->is_resizable());
99  BOOST_TEST(dst.info()->is_resizable());
100 
101  // Create and configure function
103  acc.configure(&src, alpha, &dst);
104 
105  // Validate valid region
106  const ValidRegion valid_region = shape_to_valid_region(shape);
107  validate(src.info()->valid_region(), valid_region);
108  validate(dst.info()->valid_region(), valid_region);
109 
110  // Validate padding
111  const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
112  validate(src.info()->padding(), padding);
113  validate(dst.info()->padding(), padding);
114 }
115 
116 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
117 BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ 0.f, 0.5f, 1.f }),
118  shape, alpha)
119 {
120  // Compute function
121  Tensor dst = compute_accumulate_weighted(shape, alpha);
122 
123  // Compute reference
124  RawTensor ref_dst = Reference::compute_reference_accumulate_weighted(shape, alpha);
125 
126  // Validate output
127  validate(NEAccessor(dst), ref_dst);
128 }
129 
130 BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
131 BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ 0.f, 0.5f, 1.f }),
132  shape, alpha)
133 {
134  // Compute function
135  Tensor dst = compute_accumulate_weighted(shape, alpha);
136 
137  // Compute reference
138  RawTensor ref_dst = Reference::compute_reference_accumulate_weighted(shape, alpha);
139 
140  // Validate output
141  validate(NEAccessor(dst), ref_dst);
142 }
143 
144 BOOST_AUTO_TEST_SUITE_END()
145 BOOST_AUTO_TEST_SUITE_END()
146 #endif
Tensor create_tensor(const TensorShape &shape, DataType data_type, int num_channels=1, int fixed_point_position=0)
Helper to create an empty tensor.
Definition: Helper.h:47
+ +
Simple tensor object that stores elements in a consecutive chunk of memory.
Definition: RawTensor.h:47
+
Shape of a tensor.
Definition: TensorShape.h:38
+ + +
Container for 2D border size.
Definition: Types.h:131
+
int required_padding(int size, int step)
Calculate the required padding given the available size and the required.
Definition: Utils.h:486
+ +
Unknown image format.
+ + + +
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
+
TensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
+
T x() const
Alias to access the size of the first dimension.
Definition: Dimensions.h:81
+ +
virtual bool is_resizable() const =0
Flag indicating whether the size of the tensor can be changed.
+ +
Data set containing small tensor shapes.
+ + +
ValidRegion shape_to_valid_region(TensorShape shape)
Create a valid region covering the enitre tensor shape.
Definition: Utils.h:452
+
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
+
Basic implementation of the tensor interface.
Definition: Tensor.h:37
+ +
virtual PaddingSize padding() const =0
Padding of tensor.
+
Data set containing large tensor shapes.
+
std::unique_ptr< TensorLibrary > library
Definition: main.cpp:50
+
Basic function to run NEAccumulateWeightedKernel.
Definition: NEAccumulate.h:48
+ +
void run() overridefinal
Run the kernels contained in the function.
+
Accessor implementation for Tensor objects.
Definition: NEAccessor.h:38
+
void validate(const arm_compute::ValidRegion &region, const arm_compute::ValidRegion &reference)
Validate valid regions.
Definition: Validation.cpp:166
+ +
Definition: Datasets.h:47
+ + + + + +
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor&#39;s metadata.
+
void configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16=false)
Set the input and accumulation tensors, and the scale value.
+
+
+ + + + diff --git a/documentation/_activation_function_dataset_8h.xhtml b/documentation/_activation_function_dataset_8h.xhtml new file mode 100644 index 0000000..51542fb --- /dev/null +++ b/documentation/_activation_function_dataset_8h.xhtml @@ -0,0 +1,153 @@ + + + + + + + + +Compute Library: tests/dataset/ActivationFunctionDataset.h File Reference + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+ +
+
ActivationFunctionDataset.h File Reference
+
+
+ +

Go to the source code of this file.

+ + + + + +

+Data Structures

class  ActivationFunctions
 Data set containing all possible activation functions. More...
 
+ + + + + +

+Namespaces

 arm_compute
 
 arm_compute::test
 
+
+
+ + + + diff --git a/documentation/_activation_function_dataset_8h_source.xhtml b/documentation/_activation_function_dataset_8h_source.xhtml new file mode 100644 index 0000000..b4faca3 --- /dev/null +++ b/documentation/_activation_function_dataset_8h_source.xhtml @@ -0,0 +1,149 @@ + + + + + + + + +Compute Library: tests/dataset/ActivationFunctionDataset.h Source File + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
ActivationFunctionDataset.h
+
+
+Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef __ARM_COMPUTE_TEST_DATASET_ACTIVATION_FUNCTION_DATASET_H__
25 #define __ARM_COMPUTE_TEST_DATASET_ACTIVATION_FUNCTION_DATASET_H__
26 
27 #include "arm_compute/core/Types.h"
28 #include "dataset/GenericDataset.h"
29 
30 #ifdef BOOST
31 #include "boost_wrapper.h"
32 #endif
33 
34 namespace arm_compute
35 {
36 namespace test
37 {
43 class ActivationFunctions final : public GenericDataset<ActivationLayerInfo::ActivationFunction, 9>
44 {
45 public:
48  {
58  }
59  {
60  }
61 
62  ~ActivationFunctions() = default;
63 };
64 } // namespace test
65 } // namespace arm_compute
66 #endif //__ARM_COMPUTE_TEST_DATASET_ACTIVATION_FUNCTION_DATASET_H__
Abstract data set containing multiple objects T.
+ + + + + + + + + + + + + + + +
Data set containing all possible activation functions.
+
+
+ + + + diff --git a/documentation/_activation_layer_8h.xhtml b/documentation/_activation_layer_8h.xhtml new file mode 100644 index 0000000..19163e9 --- /dev/null +++ b/documentation/_activation_layer_8h.xhtml @@ -0,0 +1,156 @@ + + + + + + + + +Compute Library: tests/benchmark/common/ActivationLayer.h File Reference + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+ +
+
ActivationLayer.h File Reference
+
+
+
#include "TensorLibrary.h"
+#include "Utils.h"
+#include "dataset/ActivationLayerDataset.h"
+#include <memory>
+
+

Go to the source code of this file.

+ + + + +

+Data Structures

class  ActivationLayer< DataSet, TensorType, Accessor, Function, dt >
 
+ + + + + + + +

+Namespaces

 arm_compute
 
 arm_compute::test
 
 arm_compute::test::benchmark
 
+
+
+ + + + diff --git a/documentation/_activation_layer_8h_source.xhtml b/documentation/_activation_layer_8h_source.xhtml new file mode 100644 index 0000000..c4a35a7 --- /dev/null +++ b/documentation/_activation_layer_8h_source.xhtml @@ -0,0 +1,150 @@ + + + + + + + + +Compute Library: tests/benchmark/common/ActivationLayer.h Source File + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
ActivationLayer.h
+
+
+Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef __ARM_COMPUTE_TEST_BENCHMARK_ACTIVATION_LAYER_H__
25 #define __ARM_COMPUTE_TEST_BENCHMARK_ACTIVATION_LAYER_H__
26 
27 #include "TensorLibrary.h"
28 #include "Utils.h"
30 
31 #include <memory>
32 
33 using namespace arm_compute;
34 using namespace arm_compute::test;
35 using namespace arm_compute::test::benchmark;
36 
37 namespace arm_compute
38 {
39 namespace test
40 {
41 namespace benchmark
42 {
43 template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32>
44 class ActivationLayer : public ::benchmark::Fixture
45 {
46 public:
47  void SetUp(::benchmark::State &state) override
48  {
49  profiler.add(std::make_shared<WallClockTimer>());
50 
51  const ActivationLayerDataObject act_obj = *(DataSet().begin() + state.range(0));
52 
53  // Set batched in source and destination shapes
54  const unsigned int batches = state.range(1);
55  const unsigned int fixed_point_position = 4;
56  TensorShape shape = act_obj.shape;
57  shape.set(shape.num_dimensions(), batches);
58 
59  // Create tensors
60  src = create_tensor(shape, dt, 1, fixed_point_position);
61  dst = create_tensor(shape, dt, 1, fixed_point_position);
62 
63  // Create and configure function
64  act_layer.configure(&src, &dst, act_obj.info);
65 
66  // Allocate tensors
67  src.allocator()->allocate();
68  dst.allocator()->allocate();
69 
70  // Fill tensors
71  library->fill_tensor_uniform(Accessor(src), 0);
72  }
73 
74  void TearDown(::benchmark::State &state) override
75  {
76  src.allocator()->free();
77  dst.allocator()->free();
78 
79  profiler.submit(state);
80  }
81 
82  Function act_layer{};
83  Profiler profiler{};
84 
85 private:
86  TensorType src{};
87  TensorType dst{};
88 };
89 } // namespace benchmark
90 } // namespace test
91 } // namespace arm_compute
92 #endif //__ARM_COMPUTE_TEST_BENCHMARK_ACTIVATION_LAYER_H__
Shape of a tensor.
Definition: TensorShape.h:38
+ + + + + + +
::benchmark::State & state
+ +
void set(size_t dimension, size_t value)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:74
+ +
std::unique_ptr< TensorLibrary > library
Definition: main.cpp:50
+
CLTensor create_tensor(const TensorShape &shape, DataType data_type, int num_channels=1, int fixed_point_position=0)
Helper to create an empty tensor.
Definition: Helper.h:47
+
unsigned int num_dimensions() const
Returns the effective dimensionality of the tensor.
Definition: Dimensions.h:109
+
void TearDown(::benchmark::State &state) override
+ +
void SetUp(::benchmark::State &state) override
+ +
+
+ + + + diff --git a/documentation/_activation_layer_dataset_8h.js b/documentation/_activation_layer_dataset_8h.js new file mode 100644 index 0000000..7ac4434 --- /dev/null +++ b/documentation/_activation_layer_dataset_8h.js @@ -0,0 +1,8 @@ +var _activation_layer_dataset_8h = +[ + [ "ActivationLayerDataObject", "classarm__compute_1_1test_1_1_activation_layer_data_object.xhtml", "classarm__compute_1_1test_1_1_activation_layer_data_object" ], + [ "AlexNetActivationLayerDataset", "classarm__compute_1_1test_1_1_alex_net_activation_layer_dataset.xhtml", "classarm__compute_1_1test_1_1_alex_net_activation_layer_dataset" ], + [ "LeNet5ActivationLayerDataset", "classarm__compute_1_1test_1_1_le_net5_activation_layer_dataset.xhtml", "classarm__compute_1_1test_1_1_le_net5_activation_layer_dataset" ], + [ "GoogLeNetActivationLayerDataset", "classarm__compute_1_1test_1_1_goog_le_net_activation_layer_dataset.xhtml", "classarm__compute_1_1test_1_1_goog_le_net_activation_layer_dataset" ], + [ "ActivationLayerDataset", "_activation_layer_dataset_8h.xhtml#aa1a629d971f45dc8c4cb7ec2d5c8728e", null ] +]; \ No newline at end of file diff --git a/documentation/_activation_layer_dataset_8h.xhtml b/documentation/_activation_layer_dataset_8h.xhtml new file mode 100644 index 0000000..d74b6ea --- /dev/null +++ b/documentation/_activation_layer_dataset_8h.xhtml @@ -0,0 +1,169 @@ + + + + + + + + +Compute Library: tests/dataset/ActivationLayerDataset.h File Reference + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+ +
+
ActivationLayerDataset.h File Reference
+
+
+
#include "TypePrinter.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "dataset/GenericDataset.h"
+#include <sstream>
+#include <type_traits>
+
+

Go to the source code of this file.

+ + + + + + + + + + +

+Data Structures

class  ActivationLayerDataObject
 
class  AlexNetActivationLayerDataset
 
class  LeNet5ActivationLayerDataset
 
class  GoogLeNetActivationLayerDataset
 
+ + + + + +

+Namespaces

 arm_compute
 
 arm_compute::test
 
+ + + + +

+Typedefs

template<unsigned int Size>
using ActivationLayerDataset = GenericDataset< ActivationLayerDataObject, Size >
 
+
+
+ + + + diff --git a/documentation/_activation_layer_dataset_8h_source.xhtml b/documentation/_activation_layer_dataset_8h_source.xhtml new file mode 100644 index 0000000..6996ac0 --- /dev/null +++ b/documentation/_activation_layer_dataset_8h_source.xhtml @@ -0,0 +1,153 @@ + + + + + + + + +Compute Library: tests/dataset/ActivationLayerDataset.h Source File + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
ActivationLayerDataset.h
+
+
+Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef __ARM_COMPUTE_TEST_DATASET_ACTIVATION_LAYER_DATASET_H__
25 #define __ARM_COMPUTE_TEST_DATASET_ACTIVATION_LAYER_DATASET_H__
26 
27 #include "TypePrinter.h"
28 
30 #include "arm_compute/core/Types.h"
31 #include "dataset/GenericDataset.h"
32 
33 #include <sstream>
34 #include <type_traits>
35 
36 #ifdef BOOST
37 #include "boost_wrapper.h"
38 #endif
39 
40 namespace arm_compute
41 {
42 namespace test
43 {
45 {
46 public:
47  operator std::string() const
48  {
49  std::stringstream ss;
50  ss << "ActivationLayer";
51  ss << "_I" << shape;
52  ss << "_F_" << info.activation();
53  return ss.str();
54  }
55 
56 public:
59 };
60 
61 template <unsigned int Size>
63 
65 {
66 public:
69  {
75  }
76  {
77  }
78 
79  ~AlexNetActivationLayerDataset() = default;
80 };
81 
83 {
84 public:
87  {
89  }
90  {
91  }
92 
93  ~LeNet5ActivationLayerDataset() = default;
94 };
95 
97 {
98 public:
101  {
102  // conv1/relu_7x7
104  // conv2/relu_3x3_reduce
106  // conv2/relu_3x3
108  // inception_3a/relu_1x1, inception_3b/relu_pool_proj
110  // inception_3a/relu_3x3_reduce, inception_3b/relu_5x5
112  // inception_3a/relu_3x3, inception_3b/relu_1x1, inception_3b/relu_3x3_reduce
114  // inception_3a/relu_5x5_reduce
116  // inception_3a/relu_5x5, inception_3a/relu_pool_proj, inception_3b/relu_5x5_reduce
118  // inception_3b/relu_3x3
120  // inception_4a/relu_1x1
122  // inception_4a/relu_3x3_reduce
124  // inception_4a/relu_3x3
126  // inception_4a/relu_5x5_reduce
128  // inception_4a/relu_5x5
130  // inception_4a/relu_pool_proj, inception_4b/relu_5x5, inception_4b/relu_pool_proj, inception_4c/relu_5x5, inception_4c/relu_pool_proj, inception_4d/relu_5x5, inception_4d/relu_pool_proj
132  // inception_4b/relu_1x1, inception_4e/relu_3x3_reduce
134  // inception_4b/relu_3x3_reduce, inception_4d/relu_1x1
136  // inception_4b/relu_3x3
138  // inception_4b/relu_5x5_reduce, inception_4c/relu_5x5_reduce
140  // inception_4c/relu_1x1, inception_4c/relu_3x3_reduce, inception_4e/relu_5x5, inception_4e/relu_pool_proj
142  // inception_4c/relu_3x3, inception_4e/relu_1x1
144  // inception_4d/relu_3x3_reduce
146  // inception_4d/relu_3x3
148  // inception_4d/relu_5x5_reduce, inception_4e/relu_5x5_reduce
150  // inception_4e/relu_3x3
152  // inception_5a/relu_1x1
154  // inception_5a/relu_3x3_reduce
156  // inception_5a/relu_3x3
158  // inception_5a/relu_5x5_reduce
160  // inception_5a/relu_5x5, inception_5a/relu_pool_proj, inception_5b/relu_5x5, inception_5b/relu_pool_proj
162  // inception_5b/relu_1x1, inception_5b/relu_3x3
164  // inception_5b/relu_3x3_reduce
166  // inception_5b/relu_5x5_reduce
168  }
169  {
170  }
171 
172  ~GoogLeNetActivationLayerDataset() = default;
173 };
174 
175 } // namespace test
176 } // namespace arm_compute
177 #endif //__ARM_COMPUTE_TEST_DATASET_ACTIVATION_LAYER_DATASET_H__
Shape of a tensor.
Definition: TensorShape.h:38
+ +
Abstract data set containing multiple objects T.
+ +
ActivationFunction activation() const
Definition: Types.h:486
+ +
Activation Layer Information class.
Definition: Types.h:458
+ + + + + + + + + + + + + + +
+
+ + + + diff --git a/documentation/_arithmetic_addition_8cpp.xhtml b/documentation/_arithmetic_addition_8cpp.xhtml new file mode 100644 index 0000000..95d0db6 --- /dev/null +++ b/documentation/_arithmetic_addition_8cpp.xhtml @@ -0,0 +1,151 @@ + + + + + + + + +Compute Library: tests/validation/NEON/ArithmeticAddition.cpp File Reference + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
ArithmeticAddition.cpp File Reference
+
+
+
#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "boost_wrapper.h"
+#include <random>
+#include <string>
+
+

Go to the source code of this file.

+
+
+ + + + diff --git a/documentation/_arithmetic_addition_8cpp_source.xhtml b/documentation/_arithmetic_addition_8cpp_source.xhtml new file mode 100644 index 0000000..31fe3cb --- /dev/null +++ b/documentation/_arithmetic_addition_8cpp_source.xhtml @@ -0,0 +1,182 @@ + + + + + + + + +Compute Library: tests/validation/NEON/ArithmeticAddition.cpp Source File + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
ArithmeticAddition.cpp
+
+
+Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "Globals.h"
25 #include "NEON/Helper.h"
26 #include "NEON/NEAccessor.h"
27 #include "TensorLibrary.h"
28 #include "TypePrinter.h"
29 #include "Utils.h"
30 #include "validation/Datasets.h"
31 #include "validation/Reference.h"
32 #include "validation/Validation.h"
33 
35 #include "arm_compute/core/Types.h"
39 
40 #include "boost_wrapper.h"
41 
42 #include <random>
43 #include <string>
44 
45 using namespace arm_compute;
46 using namespace arm_compute::test;
47 using namespace arm_compute::test::neon;
48 using namespace arm_compute::test::validation;
49 
50 namespace
51 {
62 Tensor compute_arithmetic_addition(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out, ConvertPolicy policy)
63 {
64  // Create tensors
65  Tensor src1 = create_tensor(shape, dt_in0);
66  Tensor src2 = create_tensor(shape, dt_in1);
67  Tensor dst = create_tensor(shape, dt_out);
68 
69  // Create and configure function
71  add.configure(&src1, &src2, &dst, policy);
72 
73  // Allocate tensors
74  src1.allocator()->allocate();
75  src2.allocator()->allocate();
76  dst.allocator()->allocate();
77 
78  BOOST_TEST(!src1.info()->is_resizable());
79  BOOST_TEST(!src2.info()->is_resizable());
80  BOOST_TEST(!dst.info()->is_resizable());
81 
82  // Fill tensors
83  library->fill_tensor_uniform(NEAccessor(src1), 0);
84  library->fill_tensor_uniform(NEAccessor(src2), 1);
85 
86  // Compute function
87  add.run();
88 
89  return dst;
90 }
91 
92 void validate_configuration(const Tensor &src1, const Tensor &src2, Tensor &dst, TensorShape shape, ConvertPolicy policy)
93 {
94  BOOST_TEST(src1.info()->is_resizable());
95  BOOST_TEST(src2.info()->is_resizable());
96  BOOST_TEST(dst.info()->is_resizable());
97 
98  // Create and configure function
100  add.configure(&src1, &src2, &dst, policy);
101 
102  // Validate valid region
103  const ValidRegion valid_region = shape_to_valid_region(shape);
104  validate(src1.info()->valid_region(), valid_region);
105  validate(src2.info()->valid_region(), valid_region);
106  validate(dst.info()->valid_region(), valid_region);
107 
108  // Validate padding
109  const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
110  validate(src1.info()->padding(), padding);
111  validate(src2.info()->padding(), padding);
112  validate(dst.info()->padding(), padding);
113 }
114 } // namespace
115 
116 #ifndef DOXYGEN_SKIP_THIS
117 BOOST_AUTO_TEST_SUITE(NEON)
118 BOOST_AUTO_TEST_SUITE(ArithmeticAddition)
119 
120 BOOST_AUTO_TEST_SUITE(U8)
121 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
122 BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
123  shape, policy)
124 {
125  // Create tensors
126  Tensor src1 = create_tensor(shape, DataType::U8);
127  Tensor src2 = create_tensor(shape, DataType::U8);
128  Tensor dst = create_tensor(shape, DataType::U8);
129 
130  validate_configuration(src1, src2, dst, shape, policy);
131 }
132 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
133 BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
134  shape, policy)
135 {
136  // Compute function
137  Tensor dst = compute_arithmetic_addition(shape, DataType::U8, DataType::U8, DataType::U8, policy);
138 
139  // Compute reference
140  RawTensor ref_dst = Reference::compute_reference_arithmetic_addition(shape, DataType::U8, DataType::U8, DataType::U8, policy);
141 
142  // Validate output
143  validate(NEAccessor(dst), ref_dst);
144 }
145 BOOST_AUTO_TEST_SUITE_END()
146 
147 BOOST_AUTO_TEST_SUITE(S16)
148 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
149 BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
150  shape, dt, policy)
151 {
152  // Create tensors
153  Tensor src1 = create_tensor(shape, dt);
154  Tensor src2 = create_tensor(shape, DataType::S16);
155  Tensor dst = create_tensor(shape, DataType::S16);
156 
157  validate_configuration(src1, src2, dst, shape, policy);
158 }
159 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
160 BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
161  shape, dt, policy)
162 {
163  // Compute function
164  Tensor dst = compute_arithmetic_addition(shape, dt, DataType::S16, DataType::S16, policy);
165 
166  // Compute reference
167  RawTensor ref_dst = Reference::compute_reference_arithmetic_addition(shape, dt, DataType::S16, DataType::S16, policy);
168 
169  // Validate output
170  validate(NEAccessor(dst), ref_dst);
171 }
172 BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
173 BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
174  shape, dt, policy)
175 {
176  // Compute function
177  Tensor dst = compute_arithmetic_addition(shape, dt, DataType::S16, DataType::S16, policy);
178 
179  // Compute reference
180  RawTensor ref_dst = Reference::compute_reference_arithmetic_addition(shape, dt, DataType::S16, DataType::S16, policy);
181 
182  // Validate output
183  validate(NEAccessor(dst), ref_dst);
184 }
185 BOOST_AUTO_TEST_SUITE_END()
186 
187 BOOST_AUTO_TEST_SUITE(F32)
188 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
189 BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
190  shape, policy)
191 {
192  // Create tensors
193  Tensor src1 = create_tensor(shape, DataType::F32);
194  Tensor src2 = create_tensor(shape, DataType::F32);
195  Tensor dst = create_tensor(shape, DataType::F32);
196 
197  validate_configuration(src1, src2, dst, shape, policy);
198 }
199 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
200 BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(), shape)
201 {
202  // Compute function
203  Tensor dst = compute_arithmetic_addition(shape, DataType::F32, DataType::F32, DataType::F32, ConvertPolicy::WRAP);
204 
205  // Compute reference
206  RawTensor ref_dst = Reference::compute_reference_arithmetic_addition(shape, DataType::F32, DataType::F32, DataType::F32, ConvertPolicy::WRAP);
207 
208  // Validate output
209  validate(NEAccessor(dst), ref_dst);
210 }
211 BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
212 BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
213  shape, policy)
214 {
215  // Compute function
216  Tensor dst = compute_arithmetic_addition(shape, DataType::F32, DataType::F32, DataType::F32, policy);
217 
218  // Compute reference
219  RawTensor ref_dst = Reference::compute_reference_arithmetic_addition(shape, DataType::F32, DataType::F32, DataType::F32, policy);
220 
221  // Validate output
222  validate(NEAccessor(dst), ref_dst);
223 }
224 BOOST_AUTO_TEST_SUITE_END()
225 
226 BOOST_AUTO_TEST_SUITE_END()
227 BOOST_AUTO_TEST_SUITE_END()
228 #endif
Tensor create_tensor(const TensorShape &shape, DataType data_type, int num_channels=1, int fixed_point_position=0)
Helper to create an empty tensor.
Definition: Helper.h:47
+ +
Simple tensor object that stores elements in a consecutive chunk of memory.
Definition: RawTensor.h:47
+
Shape of a tensor.
Definition: TensorShape.h:38
+ +
Basic function to run NEArithmeticAdditionKernel.
+ +
Container for 2D border size.
Definition: Types.h:131
+
int required_padding(int size, int step)
Calculate the required padding given the available size and the required.
Definition: Utils.h:486
+ +
Unknown image format.
+ +
1 channel, 1 F16 per channel
+ + +
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
+
TensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
+
T x() const
Alias to access the size of the first dimension.
Definition: Dimensions.h:81
+
fixed_point< T > add(fixed_point< T > x, fixed_point< T > y)
Definition: FixedPoint.h:885
+
virtual bool is_resizable() const =0
Flag indicating whether the size of the tensor can be changed.
+ +
Data set containing small tensor shapes.
+ + +
ValidRegion shape_to_valid_region(TensorShape shape)
Create a valid region covering the enitre tensor shape.
Definition: Utils.h:452
+
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
+ +
Basic implementation of the tensor interface.
Definition: Tensor.h:37
+ +
virtual PaddingSize padding() const =0
Padding of tensor.
+
1 channel, 1 U8 per channel
+
Data set containing large tensor shapes.
+
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy)
Initialise the kernel&#39;s inputs, output and conversion policy.
+
std::unique_ptr< TensorLibrary > library
Definition: main.cpp:50
+ +
void run() overridefinal
Run the kernels contained in the function.
+ +
Accessor implementation for Tensor objects.
Definition: NEAccessor.h:38
+
void validate(const arm_compute::ValidRegion &region, const arm_compute::ValidRegion &reference)
Validate valid regions.
Definition: Validation.cpp:166
+ +
Definition: Datasets.h:47
+ + + + + + +
DataType
Available data types.
Definition: Types.h:60
+
ConvertPolicy
Policy to handle overflow.
Definition: Types.h:204
+
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor&#39;s metadata.
+
+
+ + + + diff --git a/documentation/_arithmetic_subtraction_8cpp.xhtml b/documentation/_arithmetic_subtraction_8cpp.xhtml new file mode 100644 index 0000000..b8a6a16 --- /dev/null +++ b/documentation/_arithmetic_subtraction_8cpp.xhtml @@ -0,0 +1,151 @@ + + + + + + + + +Compute Library: tests/validation/NEON/ArithmeticSubtraction.cpp File Reference + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
ArithmeticSubtraction.cpp File Reference
+
+
+
#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "boost_wrapper.h"
+#include <random>
+#include <string>
+
+

Go to the source code of this file.

+
+
+ + + + diff --git a/documentation/_arithmetic_subtraction_8cpp_source.xhtml b/documentation/_arithmetic_subtraction_8cpp_source.xhtml new file mode 100644 index 0000000..4628817 --- /dev/null +++ b/documentation/_arithmetic_subtraction_8cpp_source.xhtml @@ -0,0 +1,182 @@ + + + + + + + + +Compute Library: tests/validation/NEON/ArithmeticSubtraction.cpp Source File + + + + + + + + + + + + + + + + +
+
+ + + + + + +
+
Compute Library +  v17.06 +
+
+
+ + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
ArithmeticSubtraction.cpp
+
+
+Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "Globals.h"
25 #include "NEON/Helper.h"
26 #include "NEON/NEAccessor.h"
27 #include "TensorLibrary.h"
28 #include "TypePrinter.h"
29 #include "Utils.h"
30 #include "validation/Datasets.h"
31 #include "validation/Reference.h"
32 #include "validation/Validation.h"
33 
35 #include "arm_compute/core/Types.h"
39 
40 #include "boost_wrapper.h"
41 
42 #include <random>
43 #include <string>
44 
45 using namespace arm_compute;
46 using namespace arm_compute::test;
47 using namespace arm_compute::test::neon;
48 using namespace arm_compute::test::validation;
49 
50 namespace
51 {
62 Tensor compute_arithmetic_subtraction(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out, ConvertPolicy policy)
63 {
64  // Create tensors
65  Tensor src1 = create_tensor(shape, dt_in0);
66  Tensor src2 = create_tensor(shape, dt_in1);
67  Tensor dst = create_tensor(shape, dt_out);
68 
69  // Create and configure function
71  sub.configure(&src1, &src2, &dst, policy);
72 
73  // Allocate tensors
74  src1.allocator()->allocate();
75  src2.allocator()->allocate();
76  dst.allocator()->allocate();
77 
78  BOOST_TEST(!src1.info()->is_resizable());
79  BOOST_TEST(!src2.info()->is_resizable());
80  BOOST_TEST(!dst.info()->is_resizable());
81 
82  // Fill tensors
83  library->fill_tensor_uniform(NEAccessor(src1), 0);
84  library->fill_tensor_uniform(NEAccessor(src2), 1);
85 
86  // Compute function
87  sub.run();
88 
89  return dst;
90 }
91 
92 void validate_configuration(const Tensor &src1, const Tensor &src2, Tensor &dst, TensorShape shape, ConvertPolicy policy)
93 {
94  BOOST_TEST(src1.info()->is_resizable());
95  BOOST_TEST(src2.info()->is_resizable());
96  BOOST_TEST(dst.info()->is_resizable());
97 
98  // Create and configure function
100  sub.configure(&src1, &src2, &dst, policy);
101 
102  // Validate valid region
103  const ValidRegion valid_region = shape_to_valid_region(shape);
104  validate(src1.info()->valid_region(), valid_region);
105  validate(src2.info()->valid_region(), valid_region);
106  validate(dst.info()->valid_region(), valid_region);
107 
108  // Validate padding
109  const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
110  validate(src1.info()->padding(), padding);
111  validate(src2.info()->padding(), padding);
112  validate(dst.info()->padding(), padding);
113 }
114 } // namespace
115 
116 #ifndef DOXYGEN_SKIP_THIS
117 BOOST_AUTO_TEST_SUITE(NEON)
118 BOOST_AUTO_TEST_SUITE(ArithmeticSubtraction)
119 
120 BOOST_AUTO_TEST_SUITE(U8)
121 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
122 BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
123  shape, policy)
124 {
125  // Create tensors
126  Tensor src1 = create_tensor(shape, DataType::U8);
127  Tensor src2 = create_tensor(shape, DataType::U8);
128  Tensor dst = create_tensor(shape, DataType::U8);
129 
130  validate_configuration(src1, src2, dst, shape, policy);
131 }
132 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
133 BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
134  shape, policy)
135 {
136  // Compute function
137  Tensor dst = compute_arithmetic_subtraction(shape, DataType::U8, DataType::U8, DataType::U8, policy);
138 
139  // Compute reference
140  RawTensor ref_dst = Reference::compute_reference_arithmetic_subtraction(shape, DataType::U8, DataType::U8, DataType::U8, policy);
141 
142  // Validate output
143  validate(NEAccessor(dst), ref_dst);
144 }
145 BOOST_AUTO_TEST_SUITE_END()
146 
147 BOOST_AUTO_TEST_SUITE(S16)
148 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
149 BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
150  shape, dt, policy)
151 {
152  // Create tensors
153  Tensor src1 = create_tensor(shape, dt);
154  Tensor src2 = create_tensor(shape, DataType::S16);
155  Tensor dst = create_tensor(shape, DataType::S16);
156 
157  validate_configuration(src1, src2, dst, shape, policy);
158 }
159 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
160 BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
161  shape, dt, policy)
162 {
163  // Compute function
164  Tensor dst = compute_arithmetic_subtraction(shape, dt, DataType::S16, DataType::S16, policy);
165 
166  // Compute reference
167  RawTensor ref_dst = Reference::compute_reference_arithmetic_subtraction(shape, dt, DataType::S16, DataType::S16, policy);
168 
169  // Validate output
170  validate(NEAccessor(dst), ref_dst);
171 }
172 BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
173 BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
174  shape, dt, policy)
175 {
176  // Compute function
177  Tensor dst = compute_arithmetic_subtraction(shape, dt, DataType::S16, DataType::S16, policy);
178 
179  // Compute reference
180  RawTensor ref_dst = Reference::compute_reference_arithmetic_subtraction(shape, dt, DataType::S16, DataType::S16, policy);
181 
182  // Validate output
183  validate(NEAccessor(dst), ref_dst);
184 }
185 BOOST_AUTO_TEST_SUITE_END()
186 
187 BOOST_AUTO_TEST_SUITE(F32)
188 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
189 BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
190  shape, policy)
191 {
192  // Create tensors
193  Tensor src1 = create_tensor(shape, DataType::F32);
194  Tensor src2 = create_tensor(shape, DataType::F32);
195  Tensor dst = create_tensor(shape, DataType::F32);
196 
197  validate_configuration(src1, src2, dst, shape, policy);
198 }
199 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
200 BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(), shape)
201 {
202  // Compute function
203  Tensor dst = compute_arithmetic_subtraction(shape, DataType::F32, DataType::F32, DataType::F32, ConvertPolicy::WRAP);
204 
205  // Compute reference
206  RawTensor ref_dst = Reference::compute_reference_arithmetic_subtraction(shape, DataType::F32, DataType::F32, DataType::F32, ConvertPolicy::WRAP);
207 
208  // Validate output
209  validate(NEAccessor(dst), ref_dst);
210 }
211 BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
212 BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
213  shape, policy)
214 {
215  // Compute function
216  Tensor dst = compute_arithmetic_subtraction(shape, DataType::F32, DataType::F32, DataType::F32, policy);
217 
218  // Compute reference
219  RawTensor ref_dst = Reference::compute_reference_arithmetic_subtraction(shape, DataType::F32, DataType::F32, DataType::F32, policy);
220 
221  // Validate output
222  validate(NEAccessor(dst), ref_dst);
223 }
224 BOOST_AUTO_TEST_SUITE_END()
225 
226 BOOST_AUTO_TEST_SUITE_END()
227 BOOST_AUTO_TEST_SUITE_END()
228 #endif
Tensor create_tensor(const TensorShape &shape, DataType data_type, int num_channels=1, int fixed_point_position=0)
Helper to create an empty tensor.
Definition: Helper.h:47
+ +
Simple tensor object that stores elements in a consecutive chunk of memory.
Definition: RawTensor.h:47
+
Shape of a tensor.
Definition: TensorShape.h:38
+ + + +
Container for 2D border size.
Definition: Types.h:131
+
int required_padding(int size, int step)
Calculate the required padding given the available size and the required.
Definition: Utils.h:486
+ +
Unknown image format.
+ +
1 channel, 1 F16 per channel
+
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy)
Initialise the kernel&#39;s inputs, output and conversion policy.
+ + +
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
+
Basic function to run NEArithmeticSubtractionKernel.
+
TensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
+
T x() const
Alias to access the size of the first dimension.
Definition: Dimensions.h:81
+
virtual bool is_resizable() const =0
Flag indicating whether the size of the tensor can be changed.
+ +
Data set containing small tensor shapes.
+ + +
ValidRegion shape_to_valid_region(TensorShape shape)
Create a valid region covering the enitre tensor shape.
Definition: Utils.h:452
+
fixed_point< T > sub(fixed_point< T > x, fixed_point< T > y)
Definition: FixedPoint.h:890
+
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
+
Basic implementation of the tensor interface.
Definition: Tensor.h:37
+ +
virtual PaddingSize padding() const =0
Padding of tensor.
+
1 channel, 1 U8 per channel
+
Data set containing large tensor shapes.
+
std::unique_ptr< TensorLibrary > library
Definition: main.cpp:50
+ +
void run() overridefinal
Run the kernels contained in the function.
+ +
Accessor implementation for Tensor objects.
Definition: NEAccessor.h:38
+
void validate(const arm_compute::ValidRegion &region, const arm_compute::ValidRegion &reference)
Validate valid regions.
Definition: Validation.cpp:166
+ +
Definition: Datasets.h:47
+ + + + + + +
DataType
Available data types.
Definition: Types.h:60
+
ConvertPolicy
Policy to handle overflow.
Definition: Types.h:204
+
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor&#39;s metadata.
+
+
+ + + + diff --git a/documentation/_array_8h.xhtml b/documentation/_array_8h.xhtml index 34320b6..197a781 100644 --- a/documentation/_array_8h.xhtml +++ b/documentation/_array_8h.xhtml @@ -6,7 +6,7 @@ -ARM Compute Library: arm_compute/runtime/Array.h File Reference +Compute Library: arm_compute/runtime/Array.h File Reference @@ -39,8 +39,8 @@ -
ARM Compute Library -  17.05 +
Compute Library +  v17.06
@@ -55,6 +55,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
-
#include "arm_compute/core/Helpers.h"
+
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Types.h"
#include <memory>

Go to the source code of this file.

@@ -144,7 +145,7 @@ Typedefs   using Coordinates2DArray = Array< Coordinates2D >   -using DetectionWindowArray = Array< DetectionWindow > +using DetectionWindowArray = Array< DetectionWindow >   using Size2DArray = Array< Size2D >   @@ -167,7 +168,7 @@ Typedefs