From: Kaizen Date: Thu, 12 Oct 2017 13:26:51 +0000 (+0100) Subject: arm_compute v17.10 X-Git-Tag: submit/tizen/20180223.063230~7 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bf8b01dfbfdca124673ade33c5eac8f3748d7abd;p=platform%2Fupstream%2Farmcl.git arm_compute v17.10 Change-Id: If1489af40eccd0219ede8946577afbf04db31b29 --- diff --git a/README.md b/README.md index f5693ff..ecd1ca1 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ Related projects: Documentation available here: +- [v17.10](https://arm-software.github.io/ComputeLibrary/v17.10/) - [v17.09](https://arm-software.github.io/ComputeLibrary/v17.09/) - [v17.06](https://arm-software.github.io/ComputeLibrary/v17.06/) - [v17.05](https://arm-software.github.io/ComputeLibrary/v17.05/) @@ -17,6 +18,7 @@ Documentation available here: Binaries available here: +- [v17.10](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.10/arm_compute-v17.10-bin.tar.gz) - [v17.09](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.09/arm_compute-v17.09-bin.tar.gz) - [v17.06](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.06/arm_compute-v17.06-bin.tar.gz) - [v17.05](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.05/arm_compute-v17.05-bin.tar.gz) diff --git a/SConscript b/SConscript index c98c391..b6305a0 100644 --- a/SConscript +++ b/SConscript @@ -24,18 +24,18 @@ import os.path import re import subprocess -VERSION = "v17.09" -SONAME_VERSION="4.0.0" +VERSION = "v17.10" +SONAME_VERSION="5.0.0" Import('env') Import('vars') -def build_library(name, sources, static=False): +def build_library(name, sources, static=False, libs=[]): if static: - obj = arm_compute_env.StaticLibrary(name, source=sources) + obj = arm_compute_env.StaticLibrary(name, source=sources, LIBS = arm_compute_env["LIBS"] + libs) else: if env['set_soname']: - obj = arm_compute_env.SharedLibrary(name, source=sources, SHLIBVERSION = SONAME_VERSION) + obj = arm_compute_env.SharedLibrary(name, source=sources, SHLIBVERSION = SONAME_VERSION, LIBS = arm_compute_env["LIBS"] + libs) symlinks = [] # Manually delete symlinks or SCons will get confused: @@ -51,7 +51,7 @@ def build_library(name, sources, static=False): Default(clean) Depends(obj, clean) else: - obj = arm_compute_env.SharedLibrary(name, source=sources) + obj = arm_compute_env.SharedLibrary(name, source=sources, LIBS = arm_compute_env["LIBS"] + libs) Default(obj) return obj @@ -191,11 +191,12 @@ if env['os'] != 'bare_metal' and not env['standalone']: shared_runtime_objects = [arm_compute_env.SharedObject(f) for f in runtime_files] static_runtime_objects = [arm_compute_env.StaticObject(f) for f in runtime_files] -arm_compute_a = build_library('arm_compute-static', static_core_objects + static_runtime_objects, static=True) +arm_compute_a = build_library('arm_compute-static', static_runtime_objects, static=True, libs = [ arm_compute_core_a ]) Export('arm_compute_a') if env['os'] != 'bare_metal' and not env['standalone']: - arm_compute_so = build_library('arm_compute', shared_core_objects + shared_runtime_objects, static=False) + arm_compute_so = build_library('arm_compute', shared_runtime_objects, static=False, libs = [ "arm_compute_core" ]) + Depends(arm_compute_so, arm_compute_core_so) Export('arm_compute_so') if env['neon'] and env['opencl']: @@ -208,10 +209,11 @@ if env['neon'] and env['opencl']: shared_graph_objects = [arm_compute_env.SharedObject(f) for f in graph_files] static_graph_objects = [arm_compute_env.StaticObject(f) for f in graph_files] - arm_compute_graph_a = build_library('arm_compute_graph-static', static_core_objects + static_runtime_objects + static_graph_objects, static=True) + arm_compute_graph_a = build_library('arm_compute_graph-static', static_graph_objects, static=True, libs = [ arm_compute_a ]) Export('arm_compute_graph_a') - arm_compute_graph_so = build_library('arm_compute_graph', shared_core_objects + shared_runtime_objects + shared_graph_objects, static=False) + arm_compute_graph_so = build_library('arm_compute_graph', shared_graph_objects, static=False, libs = [ "arm_compute", "arm_compute_core" ]) + Depends( arm_compute_graph_so, arm_compute_so) Export('arm_compute_graph_so') graph_alias = arm_compute_env.Alias("arm_compute_graph", [arm_compute_graph_a, arm_compute_graph_so]) diff --git a/SConstruct b/SConstruct index 50370d3..4428a09 100644 --- a/SConstruct +++ b/SConstruct @@ -56,6 +56,7 @@ vars.AddVariables( ) env = Environment(platform="posix", variables=vars, ENV = os.environ) +env.Append(LIBPATH = ["#build/%s" % env['build_dir']]) SConsignFile('build/.%s' % env['build_dir']) @@ -107,7 +108,7 @@ if env['arch'] == 'armv7a': prefix = "arm-linux-gnueabihf-" env.Append(CXXFLAGS = ['-mfloat-abi=hard']) elif env['os'] == 'bare_metal': - prefix = "arm-none-eabi-" + prefix = "arm-eabi-" env.Append(CXXFLAGS = ['-mfloat-abi=hard']) elif env['os'] == 'android': prefix = "arm-linux-androideabi-" @@ -118,7 +119,7 @@ elif env['arch'] == 'arm64-v8a': if env['os'] == 'linux': prefix = "aarch64-linux-gnu-" elif env['os'] == 'bare_metal': - prefix = "aarch64-none-elf-" + prefix = "aarch64-elf-" elif env['os'] == 'android': prefix = "aarch64-linux-android-" elif env['arch'] == 'arm64-v8.2-a': @@ -216,7 +217,7 @@ SConscript('./SConscript', variant_dir='#build/%s' % env['build_dir'], duplicate if env['opencl']: SConscript("./opencl-1.2-stubs/SConscript", variant_dir="build/%s/opencl-1.2-stubs" % env['build_dir'], duplicate=0) -if env['examples']: +if env['examples'] and env['os'] != 'bare_metal': SConscript('./examples/SConscript', variant_dir='#build/%s/examples' % env['build_dir'], duplicate=0) if env['os'] != 'bare_metal': diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h index 4d4565d..fc131cd 100644 --- a/arm_compute/core/CL/CLKernelLibrary.h +++ b/arm_compute/core/CL/CLKernelLibrary.h @@ -162,11 +162,9 @@ public: */ void init(std::string kernel_path = ".", cl::Context context = cl::Context::getDefault(), cl::Device device = cl::Device::getDefault()) { - _kernel_path = std::move(kernel_path); - _context = std::move(context); - _device = std::move(device); - _max_workgroup_size = 0; - max_local_workgroup_size(); + _kernel_path = std::move(kernel_path); + _context = std::move(context); + _device = std::move(device); } /** Sets the path that the kernels reside in. * @@ -208,20 +206,15 @@ public: { _device = cl_devices[0]; } - - _max_workgroup_size = 0; - max_local_workgroup_size(); - }; + } /** Sets the CL device for which the programs are created. * * @param[in] device A CL device. */ void set_device(cl::Device device) { - _device = std::move(device); - _max_workgroup_size = 0; - max_local_workgroup_size(); - }; + _device = std::move(device); + } /** Creates a kernel from the kernel library. * * @param[in] kernel_name Kernel name. @@ -238,15 +231,14 @@ public: * */ void load_binary(); - /** Find the maximum number of local work items in a workgroup can be supported by the device + /** Find the maximum number of local work items in a workgroup can be supported for the kernel. * */ - size_t max_local_workgroup_size(); - - /** Return the default NDRange that is suitable for the device. + size_t max_local_workgroup_size(const cl::Kernel &kernel) const; + /** Return the default NDRange for the device. * */ - cl::NDRange default_ndrange(); + cl::NDRange default_ndrange() const; private: /** Load program and its dependencies. @@ -270,7 +262,6 @@ private: static const std::map _kernel_program_map; /**< Map that associates kernel names with programs. */ static const std::map _program_source_map; /**< Contains sources for all programs. Used for compile-time kernel inclusion. >*/ - size_t _max_workgroup_size; /** Maximum local workgroup size supported on the device */ }; } #endif /* __ARM_COMPUTE_CLKERNELLIBRARY_H__ */ diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h index d118d13..9119940 100644 --- a/arm_compute/core/CL/ICLKernel.h +++ b/arm_compute/core/CL/ICLKernel.h @@ -175,6 +175,12 @@ public: */ GPUTarget get_target() const; + /** Get the maximum workgroup size for the device the CLKernelLibrary uses. + * + * @return The maximum workgroup size value. + */ + size_t get_max_workgroup_size(); + private: /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx. * @@ -208,10 +214,11 @@ private: unsigned int num_arguments_per_tensor() const; protected: - cl::Kernel _kernel; /**< OpenCL kernel to run */ - cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */ - GPUTarget _target; /**< The targeted GPU */ - std::string _config_id; /**< Configuration ID */ + cl::Kernel _kernel; /**< OpenCL kernel to run */ + cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */ + GPUTarget _target; /**< The targeted GPU */ + std::string _config_id; /**< Configuration ID */ + size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */ }; /** Add the kernel to the command queue with the given window. @@ -223,7 +230,7 @@ protected: * @param[in,out] queue OpenCL command queue. * @param[in] kernel Kernel to enqueue * @param[in] window Window the kernel has to process. - * @param[in] lws_hint Local workgroup size requested, by default (128,1) + * @param[in] lws_hint Local workgroup size requested, by default (128,1). * * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. */ diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h index 897e936..6780e23 100644 --- a/arm_compute/core/CL/OpenCL.h +++ b/arm_compute/core/CL/OpenCL.h @@ -83,6 +83,8 @@ public: using clGetDeviceInfo_func = cl_int (*)(cl_device_id, cl_device_info, size_t, void *, size_t *); using clGetDeviceIDs_func = cl_int (*)(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *); using clRetainEvent_func = cl_int (*)(cl_event); + using clGetPlatformIDs_func = cl_int (*)(cl_uint, cl_platform_id *, cl_uint *); + using clGetKernelWorkGroupInfo_func = cl_int (*)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *); clBuildProgram_func clBuildProgram = nullptr; clEnqueueNDRangeKernel_func clEnqueueNDRangeKernel = nullptr; @@ -113,6 +115,8 @@ public: clGetDeviceInfo_func clGetDeviceInfo = nullptr; clGetDeviceIDs_func clGetDeviceIDs = nullptr; clRetainEvent_func clRetainEvent = nullptr; + clGetPlatformIDs_func clGetPlatformIDs = nullptr; + clGetKernelWorkGroupInfo_func clGetKernelWorkGroupInfo = nullptr; private: std::pair _loaded{ false, false }; diff --git a/arm_compute/core/Logger.h b/arm_compute/core/Logger.h new file mode 100644 index 0000000..0848479 --- /dev/null +++ b/arm_compute/core/Logger.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_LOGGER_H__ +#define __ARM_COMPUTE_LOGGER_H__ + +#include +#include + +#ifdef ARM_COMPUTE_DEBUG_ENABLED +#define ARM_COMPUTE_LOG(x) (arm_compute::Logger::get().log_info() << x) +#else /* ARM_COMPUTE_DEBUG_ENABLED */ +#define ARM_COMPUTE_LOG(...) +#endif /* ARM_COMPUTE_DEBUG_ENABLED */ + +namespace arm_compute +{ +/**< Verbosity of the logger */ +enum class LoggerVerbosity +{ + NONE, /**< No info */ + INFO /**< Log info */ +}; + +/** Logger singleton class */ +class Logger +{ +public: + static Logger &get(); + void set_logger(std::ostream &ostream, LoggerVerbosity verbosity); + std::ostream &log_info(); + +private: + /** Default constructor */ + Logger(); + /** Allow instances of this class to be moved */ + Logger(Logger &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + Logger(const Logger &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + Logger &operator=(const Logger &) = delete; + /** Allow instances of this class to be moved */ + Logger &operator=(Logger &&) = default; + + std::ostream *_ostream; + std::ostream _nullstream; + LoggerVerbosity _verbosity; +}; +} // arm_compute +#endif /* __ARM_COMPUTE_LOGGER_H__ */ \ No newline at end of file diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl index d401b0e..966313d 100644 --- a/arm_compute/core/NEON/NEFixedPoint.inl +++ b/arm_compute/core/NEON/NEFixedPoint.inl @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include #include namespace arm_compute diff --git a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h index 6cd351f..f65a6c2 100644 --- a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h +++ b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h @@ -109,7 +109,7 @@ private: * * @return Values A11, A12, A22 */ - std::tuple compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int *bilinear_ix, int *bilinear_iy); + std::tuple compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int32_t *bilinear_ix, int32_t *bilinear_iy); /** Compute the vector A^T * b, i.e. -sum(I_d * I_t) for d in {x,y} * * @param[in] old_keypoint Old keypoint for which gradient is computed @@ -119,7 +119,7 @@ private: * * @return Values b1, b2 */ - std::pair compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int *bilinear_ix, const int *bilinear_iy); + std::pair compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int32_t *bilinear_ix, const int32_t *bilinear_iy); const ITensor *_input_old; const ITensor *_input_new; diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h index 2a0ecf8..9d7c751 100644 --- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -48,6 +48,8 @@ public: ~NEPoolingLayerKernel() = default; /** Set the input and output tensors. * + * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only + * * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. @@ -123,6 +125,13 @@ private: */ template void pooling7_f32(const Window &window_input, const Window &window); + /** Function to perform NxN pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void poolingN_f32(const Window &window_input, const Window &window); /** Common signature for all the specialised Pooling functions * * @param[in] window_input Input region on which to execute the kernel. diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h index 6340556..ce3726b 100644 --- a/arm_compute/core/PixelValue.h +++ b/arm_compute/core/PixelValue.h @@ -84,6 +84,25 @@ public: { value.s32 = v; } + + /** Initialize the union with a U64 pixel value + * + * @param[in] v U64 value. + */ + PixelValue(uint64_t v) + : PixelValue() + { + value.u64 = v; + } + /** Initialize the union with a S64 pixel value + * + * @param[in] v S64 value. + */ + PixelValue(int64_t v) + : PixelValue() + { + value.s64 = v; + } /** Initialize the union with a F16 pixel value * * @param[in] v F16 value. @@ -102,6 +121,15 @@ public: { value.f32 = v; } + /** Initialize the union with a F64 pixel value + * + * @param[in] v F64 value. + */ + PixelValue(double v) + : PixelValue() + { + value.f64 = v; + } /** Union which describes the value of a pixel for any image format. * Use the field corresponding to the image format */ @@ -110,6 +138,7 @@ public: uint8_t rgb[3]; /**< 3 channels: RGB888 */ uint8_t yuv[3]; /**< 3 channels: Any YUV format */ uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ + double f64; /**< Single channel double */ float f32; /**< Single channel float 32 */ half f16; /**< Single channel F16 */ uint8_t u8; /**< Single channel U8 */ @@ -118,6 +147,8 @@ public: int16_t s16; /**< Single channel S16 */ uint32_t u32; /**< Single channel U32 */ int32_t s32; /**< Single channel S32 */ + uint64_t u64; /**< Single channel U64 */ + int64_t s64; /**< Single channel S64 */ } value; /** Interpret the pixel value as a U8 * @@ -167,6 +198,22 @@ public: { v = value.s32; } + /** Interpret the pixel value as a U64 + * + * @param[out] v Returned value + */ + void get(uint64_t &v) const + { + v = value.u64; + } + /** Interpret the pixel value as a S64 + * + * @param[out] v Returned value + */ + void get(int64_t &v) const + { + v = value.s64; + } /** Interpret the pixel value as a F16 * * @param[out] v Returned value @@ -183,6 +230,25 @@ public: { v = value.f32; } + /** Interpret the pixel value as a double + * + * @param[out] v Returned value + */ + void get(double &v) const + { + v = value.f64; + } + /** Get the pixel value + * + * @return Pixel value + */ + template + T get() const + { + T val; + get(val); + return val; + } }; } #endif /* __ARM_COMPUTE_PIXELVALUE_H__ */ diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h index e2532fd..54fb66a 100644 --- a/arm_compute/core/SubTensorInfo.h +++ b/arm_compute/core/SubTensorInfo.h @@ -27,6 +27,7 @@ #include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Helpers.h" #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/TensorShape.h" diff --git a/arm_compute/core/TensorShape.h b/arm_compute/core/TensorShape.h index 8d15c50..3b395e7 100644 --- a/arm_compute/core/TensorShape.h +++ b/arm_compute/core/TensorShape.h @@ -164,7 +164,7 @@ private: /** Remove trailing dimensions of size 1 from the reported number of dimensions. */ void apply_dimension_correction() { - for(int i = static_cast(_num_dimensions) - 1; i >= 0; --i) + for(int i = static_cast(_num_dimensions) - 1; i > 0; --i) { if(_id[i] == 1) { diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index ab5d110..06d6746 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -35,6 +35,7 @@ #include #include #include +#include namespace arm_compute { @@ -419,6 +420,37 @@ inline uint32_t calculate_matrix_scale(const int16_t *matrix, unsigned int matri return std::max(1, std::abs(std::accumulate(matrix, matrix + size, 0))); } +/** Calculate the output shapes of the depth concatenate function. + * + * @param[in] inputs_vector The vector that stores all the pointers to input. + * + * @return the output shape + */ +template +TensorShape calculate_depth_concatenate_shape(const std::vector &inputs_vector) +{ + TensorShape out_shape = inputs_vector[0]->info()->tensor_shape(); + + size_t max_x = 0; + size_t max_y = 0; + size_t depth = 0; + + for(const auto &tensor : inputs_vector) + { + ARM_COMPUTE_ERROR_ON(tensor == nullptr); + const TensorShape shape = tensor->info()->tensor_shape(); + max_x = std::max(shape.x(), max_x); + max_y = std::max(shape.y(), max_y); + depth += shape.z(); + } + + out_shape.set(0, max_x); + out_shape.set(1, max_y); + out_shape.set(2, depth); + + return out_shape; +} + /** Calculate accurary required by the horizontal and vertical convolution computations * * @param[in] conv_col Pointer to the vertical vector of the separated convolution filter diff --git a/arm_compute/graph/Graph.h b/arm_compute/graph/Graph.h index 3c263c2..9d06f44 100644 --- a/arm_compute/graph/Graph.h +++ b/arm_compute/graph/Graph.h @@ -65,25 +65,17 @@ public: * @param[in] tensor Tensor to add */ void add_tensor(std::unique_ptr tensor); - /** Sets an execution hint to the graph - * - * @note Hint is propagated to the following node and as per name - * its just a hint/preference to be considered by the graph executor - * - * @param[in] hint execution hint - */ - void set_hint(Hint hint); /** Manually sets the output of the current node * * @param[in] tmp Output info to set */ void set_temp(TensorInfo &&tmp); - /** Sets whether to enable information print out + /** Returns the graph hints that are currently used * - * @param[in] is_enabled Set to true if need info printed out + * @return Graph hints */ - void set_info_enablement(bool is_enabled); + GraphHints &hints(); private: class Private; @@ -106,14 +98,22 @@ Graph &operator<<(Graph &graph, TensorInfo &&info); * @return Updated graph */ Graph &operator<<(Graph &graph, Tensor &&tensor); -/** Overloaded stream operator to provide an execution hint to the graph +/** Overloaded stream operator to provide a target hint to the graph + * + * @param[in, out] graph Graph to provide the hint to + * @param[in] target_hint Target hint to be considered + * + * @return Updated graph + */ +Graph &operator<<(Graph &graph, TargetHint target_hint); +/** Overloaded stream operator to provide a convolution method hint to the graph * - * @param[in, out] graph Graph to provide the hint to - * @param[in] hint Execution hint to be considered + * @param[in, out] graph Graph to provide the hint to + * @param[in] conv_method_hint Convolution method hint to be considered * * @return Updated graph */ -Graph &operator<<(Graph &graph, Hint hint); +Graph &operator<<(Graph &graph, ConvolutionMethodHint conv_method_hint); /** Overloaded stream operator to add a node to the graph * * @param[in, out] graph Graph to add the tensor diff --git a/arm_compute/graph/GraphContext.h b/arm_compute/graph/GraphContext.h new file mode 100644 index 0000000..98bc8c0 --- /dev/null +++ b/arm_compute/graph/GraphContext.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GRAPH_CONTEXT_H__ +#define __ARM_COMPUTE_GRAPH_CONTEXT_H__ + +#include "arm_compute/graph/Types.h" + +namespace arm_compute +{ +namespace graph +{ +/** Hints that can be passed to the graph to expose parameterization */ +class GraphHints +{ +public: + /** Default Constructor */ + GraphHints(TargetHint target_hint = TargetHint::DONT_CARE, + ConvolutionMethodHint conv_method_hint = ConvolutionMethodHint::GEMM); + /** Sets target execution hint + * + * @param target_hint Target execution hint + */ + void set_target_hint(TargetHint target_hint); + /** Sets convolution method to use + * + * @param convolution_method Convolution method to use + */ + void set_convolution_method_hint(ConvolutionMethodHint convolution_method); + /** Returns target execution hint + * + * @return target execution hint + */ + TargetHint target_hint() const; + /** Returns convolution method hint + * + * @return convolution method hint + */ + ConvolutionMethodHint convolution_method_hint() const; + +private: + TargetHint _target_hint; /**< Target execution hint */ + ConvolutionMethodHint _convolution_method_hint; /**< Convolution method hint */ +}; + +/** Graph context */ +class GraphContext +{ +public: + /** Default Constuctor */ + GraphContext(); + /** Returns graph hints + * + * @return Graph hints + */ + GraphHints &hints(); + /** Returns graph hints + * + * @return Graph hints + */ + const GraphHints &hints() const; + +private: + GraphHints _hints; /**< Graph hints */ +}; +} // namespace graph +} // namespace arm_compute +#endif /* __ARM_COMPUTE_GRAPH_CONTEXT_H__ */ diff --git a/arm_compute/graph/INode.h b/arm_compute/graph/INode.h index 13b5d05..1b22bdf 100644 --- a/arm_compute/graph/INode.h +++ b/arm_compute/graph/INode.h @@ -24,6 +24,7 @@ #ifndef __ARM_COMPUTE_GRAPH_INODE_H__ #define __ARM_COMPUTE_GRAPH_INODE_H__ +#include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/Types.h" #include "arm_compute/runtime/IFunction.h" @@ -41,37 +42,33 @@ public: virtual ~INode() = default; /** Interface to be implemented that instantiates the node * - * @param[in] hint Hint to where the node should be executed + * @param[in] ctx Graph context to be used * @param[in] input Input tensor of the node * @param[in] output Output tensor of the node */ - virtual std::unique_ptr instantiate_node(Hint hint, ITensor *input, ITensor *output) = 0; - /** Override the existing hint + virtual std::unique_ptr instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) = 0; + /** Override the existing target hint * * @note If the input is DONT_CARE then the method has to pick a technology, * else it can accept the hint or override it (But not with DONT_CARE) * - * @param[in] hint Hint to be considered + * @param[in] target_hint Target hint to be considered * - * @return The updated hint + * @return The updated target hint */ - Hint override_hint(Hint hint) const; - - virtual void print_info() = 0; + TargetHint override_target_hint(TargetHint target_hint) const; protected: - /** Interface to be implement that override the hint + /** Interface to be implement that override the hints * - * @param[in] hint Hint to be considered + * @param[in] hints Hints to be considered * - * @return The updated hint + * @return The updated hints */ - virtual Hint node_override_hint(Hint hint) const; + virtual GraphHints node_override_hints(GraphHints hints) const; protected: - Hint _hint{ Hint::DONT_CARE }; - ITensor *_input{ nullptr }; - ITensor *_output{ nullptr }; + TargetHint _target_hint{ TargetHint::DONT_CARE }; }; } // namespace graph } // namespace arm_compute diff --git a/arm_compute/graph/Nodes.h b/arm_compute/graph/Nodes.h index 5e995ac..548deab 100644 --- a/arm_compute/graph/Nodes.h +++ b/arm_compute/graph/Nodes.h @@ -25,8 +25,12 @@ #define __ARM_COMPUTE_GRAPH_NODES_H__ #include "arm_compute/graph/nodes/ActivationLayer.h" +#include "arm_compute/graph/nodes/BatchNormalizationLayer.h" #include "arm_compute/graph/nodes/ConvolutionLayer.h" +#include "arm_compute/graph/nodes/FloorLayer.h" #include "arm_compute/graph/nodes/FullyConnectedLayer.h" +#include "arm_compute/graph/nodes/L2NormalizeLayer.h" +#include "arm_compute/graph/nodes/NormalizationLayer.h" #include "arm_compute/graph/nodes/PoolingLayer.h" #include "arm_compute/graph/nodes/SoftmaxLayer.h" diff --git a/arm_compute/graph/SubTensor.h b/arm_compute/graph/SubTensor.h new file mode 100644 index 0000000..ace93d2 --- /dev/null +++ b/arm_compute/graph/SubTensor.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GRAPH_SUBTENSOR_H__ +#define __ARM_COMPUTE_GRAPH_SUBTENSOR_H__ + +#include "arm_compute/graph/ITensorAccessor.h" +#include "arm_compute/graph/Tensor.h" +#include "arm_compute/graph/Types.h" +#include "support/ToolchainSupport.h" + +#include + +namespace arm_compute +{ +namespace graph +{ +/** SubTensor class */ +class SubTensor final +{ +public: + /** Default Constructor */ + SubTensor(); + /** Constructor + * + * @param[in] parent Parent to create sub-tensor from + * @param[in] tensor_shape Sub-tensor shape + * @param[in] coords Starting coordinates of the sub-tensor in the parent tensor + */ + SubTensor(Tensor &parent, TensorShape tensor_shape, Coordinates coords); + /** Constructor + * + * @param[in] parent Parent to create sub-tensor from + * @param[in] tensor_shape Sub-tensor shape + * @param[in] coords Starting coordinates of the sub-tensor in the parent tensor + * @param[in] target Execution target + */ + SubTensor(ITensor *parent, TensorShape tensor_shape, Coordinates coords, TargetHint target); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + SubTensor(const SubTensor &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + SubTensor &operator=(const SubTensor &) = delete; + /** Allow instances of this class to be moved */ + SubTensor(SubTensor &&) = default; + /** Allow instances of this class to be moved */ + SubTensor &operator=(SubTensor &&) = default; + /** Default Destructor */ + ~SubTensor() = default; + + /** Sets the given TensorInfo to the tensor + * + * @param[in] info TensorInfo to set + */ + void set_info(SubTensorInfo &&info); + /** Returns tensor's TensorInfo + * + * @return TensorInfo of the tensor + */ + const SubTensorInfo &info() const; + /** Returns a pointer to the internal tensor + * + * @return Tensor + */ + ITensor *tensor(); + /** Return the target that this tensor is pinned on + * + * @return Target of the tensor + */ + TargetHint target() const; + +private: + /** Instantiates a sub-tensor */ + void instantiate_subtensor(); + +private: + TargetHint _target; /**< Target that this tensor is pinned on */ + Coordinates _coords; /**< SubTensor Coordinates */ + SubTensorInfo _info; /**< SubTensor metadata */ + ITensor *_parent; /**< Parent tensor */ + std::unique_ptr _subtensor; /**< SubTensor */ +}; +} // namespace graph +} // namespace arm_compute +#endif /* __ARM_COMPUTE_GRAPH_SUBTENSOR_H__ */ diff --git a/arm_compute/graph/Tensor.h b/arm_compute/graph/Tensor.h index 0e823ff..9fdd56d 100644 --- a/arm_compute/graph/Tensor.h +++ b/arm_compute/graph/Tensor.h @@ -49,7 +49,7 @@ public: */ template Tensor(std::unique_ptr accessor) - : _target(Hint::DONT_CARE), _info(), _accessor(std::move(accessor)), _tensor(nullptr) + : _target(TargetHint::DONT_CARE), _info(), _accessor(std::move(accessor)), _tensor(nullptr) { } /** Constructor @@ -58,7 +58,7 @@ public: */ template Tensor(AccessorType &&accessor) - : _target(Hint::DONT_CARE), _info(), _accessor(arm_compute::support::cpp14::make_unique(std::forward(accessor))), _tensor(nullptr) + : _target(TargetHint::DONT_CARE), _info(), _accessor(arm_compute::support::cpp14::make_unique(std::forward(accessor))), _tensor(nullptr) { } /** Constructor @@ -68,7 +68,7 @@ public: */ template Tensor(TensorInfo &&info, AccessorType &&accessor) - : _target(Hint::DONT_CARE), _info(info), _accessor(arm_compute::support::cpp14::make_unique(std::forward(accessor))), _tensor(nullptr) + : _target(TargetHint::DONT_CARE), _info(info), _accessor(arm_compute::support::cpp14::make_unique(std::forward(accessor))), _tensor(nullptr) { } /** Default Destructor */ @@ -95,7 +95,7 @@ public: * * @return */ - ITensor *set_target(Hint target); + ITensor *set_target(TargetHint target); /** Returns tensor's TensorInfo * * @return TensorInfo of the tensor @@ -114,10 +114,10 @@ public: * * @return Target of the tensor */ - Hint target() const; + TargetHint target() const; private: - Hint _target; /**< Target that this tensor is pinned on */ + TargetHint _target; /**< Target that this tensor is pinned on */ TensorInfo _info; /**< Tensor metadata */ std::unique_ptr _accessor; /**< Tensor Accessor */ std::unique_ptr _tensor; /**< Tensor */ diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h index 0b9596d..e48ff84 100644 --- a/arm_compute/graph/Types.h +++ b/arm_compute/graph/Types.h @@ -25,30 +25,41 @@ #define __ARM_COMPUTE_GRAPH_TYPES_H__ #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/SubTensorInfo.h" #include "arm_compute/core/TensorInfo.h" namespace arm_compute { namespace graph { -using arm_compute::ActivationLayerInfo; using arm_compute::ITensor; using arm_compute::TensorInfo; +using arm_compute::SubTensorInfo; using arm_compute::DataType; +using arm_compute::Coordinates; using arm_compute::TensorShape; using arm_compute::PadStrideInfo; using arm_compute::WeightsInfo; +using arm_compute::ActivationLayerInfo; +using arm_compute::NormType; +using arm_compute::NormalizationLayerInfo; using arm_compute::PoolingLayerInfo; using arm_compute::PoolingType; /**< Execution hint to the graph executor */ -enum class Hint +enum class TargetHint { DONT_CARE, /**< Run node in any device */ OPENCL, /**< Run node on an OpenCL capable device (GPU) */ NEON /**< Run node on a NEON capable device */ }; +/**< Convolution method hint to the graph executor */ +enum class ConvolutionMethodHint +{ + GEMM, /**< Convolution using GEMM */ + DIRECT /**< Direct convolution */ +}; } // namespace graph } // namespace arm_compute #endif /*__ARM_COMPUTE_GRAPH_TYPES_H__*/ diff --git a/arm_compute/graph/nodes/ActivationLayer.h b/arm_compute/graph/nodes/ActivationLayer.h index c23674e..efe8112 100644 --- a/arm_compute/graph/nodes/ActivationLayer.h +++ b/arm_compute/graph/nodes/ActivationLayer.h @@ -24,6 +24,7 @@ #ifndef __ARM_COMPUTE_GRAPH_ACTIVATION_LAYER_H__ #define __ARM_COMPUTE_GRAPH_ACTIVATION_LAYER_H__ +#include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/INode.h" #include "arm_compute/graph/Tensor.h" #include "arm_compute/graph/Types.h" @@ -33,7 +34,7 @@ namespace arm_compute namespace graph { /** Activation Layer node */ -class ActivationLayer : public INode +class ActivationLayer final : public INode { public: /** Default Constructor @@ -43,8 +44,7 @@ public: ActivationLayer(const ActivationLayerInfo activation_info); // Inherited methods overriden: - std::unique_ptr instantiate_node(Hint hint, ITensor *input, ITensor *output) override; - void print_info() override; + std::unique_ptr instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override; private: const ActivationLayerInfo _activation_info; /**< Activation layer info */ diff --git a/arm_compute/graph/nodes/BatchNormalizationLayer.h b/arm_compute/graph/nodes/BatchNormalizationLayer.h new file mode 100644 index 0000000..f01cac2 --- /dev/null +++ b/arm_compute/graph/nodes/BatchNormalizationLayer.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GRAPH_BATCHNORMALIZATION_LAYER_H__ +#define __ARM_COMPUTE_GRAPH_BATCHNORMALIZATION_LAYER_H__ + +#include "arm_compute/graph/GraphContext.h" +#include "arm_compute/graph/INode.h" +#include "arm_compute/graph/Tensor.h" +#include "arm_compute/graph/Types.h" + +namespace arm_compute +{ +namespace graph +{ +/** BatchNormalization layer node */ +class BatchNormalizationLayer final : public INode +{ +public: + /** Default constructor + * + * @param[in] mean Mean values tensor + * @param[in] var Var values tensor + * @param[in] gamma Gamma values tensor + * @param[in] beta Beta values tensor + * @param[in] epsilon Epsilon value + */ + template + BatchNormalizationLayer(AccessorType &&mean, AccessorType &&var, AccessorType &&gamma, AccessorType &&beta, float epsilon) + : _mean(std::move(mean)), _var(std::move(var)), _gamma(std::move(gamma)), _beta(std::move(beta)), _epsilon(epsilon) + { + } + + // Inherited methods overriden: + std::unique_ptr instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override; + +private: + Tensor _mean; + Tensor _var; + Tensor _gamma; + Tensor _beta; + float _epsilon; +}; +} // namespace graph +} // namespace arm_compute +#endif /* __ARM_COMPUTE_GRAPH_BATCHNORMALIZATION_LAYER_H__ */ diff --git a/arm_compute/graph/nodes/ConvolutionLayer.h b/arm_compute/graph/nodes/ConvolutionLayer.h index c0e257b..04ba3dd 100644 --- a/arm_compute/graph/nodes/ConvolutionLayer.h +++ b/arm_compute/graph/nodes/ConvolutionLayer.h @@ -24,16 +24,21 @@ #ifndef __ARM_COMPUTE_GRAPH_CONVOLUTION_LAYER_H__ #define __ARM_COMPUTE_GRAPH_CONVOLUTION_LAYER_H__ +#include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/INode.h" +#include "arm_compute/graph/SubTensor.h" #include "arm_compute/graph/Tensor.h" #include "arm_compute/graph/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include namespace arm_compute { namespace graph { /** Convolution layer node */ -class ConvolutionLayer : public INode +class ConvolutionLayer final : public INode { public: /** Default Constructor @@ -44,27 +49,70 @@ public: * @param[in] weights Weights of the convolution layer * @param[in] biases Bias of the convolution layer * @param[in] conv_info Convolution information - * @param[in] weights_info Weights information + * @param[in] num_groups (Optional) Number of groups, default = 1 + * @param[in] weights_info (Optional) Weights information */ template - ConvolutionLayer(unsigned int conv_width, unsigned int conv_height, unsigned int ofm, AccessorTypeWeights &&weights, - AccessorTypeBiases &&biases, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()) - : _conv_width(conv_width), _conv_height(conv_height), _ofm(ofm), _weights(std::move(weights)), _biases(std::move(biases)), _conv_info(conv_info), _weights_info(weights_info) + ConvolutionLayer(unsigned int conv_width, + unsigned int conv_height, + unsigned int ofm, + AccessorTypeWeights &&weights, + AccessorTypeBiases &&biases, + const PadStrideInfo conv_info, + unsigned int num_groups = 1, + const WeightsInfo weights_info = WeightsInfo()) + : _conv_width(conv_width), + _conv_height(conv_height), + _ofm(ofm), + _weights(std::move(weights)), + _biases(std::move(biases)), + _conv_info(std::move(conv_info)), + _num_groups(num_groups), + _weights_info(std::move(weights_info)), + _is(nullptr), + _os(nullptr), + _ws(nullptr), + _bs(nullptr) { } // Inherited methods overriden: - std::unique_ptr instantiate_node(Hint hint, ITensor *input, ITensor *output) override; - void print_info() override; + std::unique_ptr instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override; + +private: + /** Instantiates a non-grouped convolution + * + * @param[in] input Input tensor + * @param[in] output Output tensor + * @param[in] conv_method_hint Hint that specifies which convolution layer method to use + * + * @return Convolution function + */ + std::unique_ptr instantiate_convolution(ITensor *input, ITensor *output, ConvolutionMethodHint conv_method_hint); + /** Instantiates a grouped convolution + * + * @param[in] input Input tensor + * @param[in] output Output tensor + * @param[in] conv_method_hint Hint that specifies which convolution layer method to use + * + * @return Grouped Convolution function + */ + std::unique_ptr instantiate_grouped_convolution(ITensor *input, ITensor *output, ConvolutionMethodHint conv_method_hint); private: - unsigned int _conv_width; /**< Convolution width */ - unsigned int _conv_height; /**< Convolution height */ - unsigned int _ofm; /**< Output feature maps */ - Tensor _weights; /**< Weights tensor */ - Tensor _biases; /**< Biases tensor */ - const PadStrideInfo &_conv_info; /**< Convolution layer information */ - const WeightsInfo &_weights_info; /**< Convolution layer weights information */ + unsigned int _conv_width; /**< Convolution width */ + unsigned int _conv_height; /**< Convolution height */ + unsigned int _ofm; /**< Output feature maps */ + Tensor _weights; /**< Weights tensor */ + Tensor _biases; /**< Biases tensor */ + const PadStrideInfo _conv_info; /**< Convolution layer information */ + unsigned int _num_groups; /**< Number of groups */ + const WeightsInfo _weights_info; /**< Convolution layer weights information */ + + std::unique_ptr _is; /**< Input tensor sub-tensors used for grouped convolution */ + std::unique_ptr _os; /**< Output tensor sub-tensors used for grouped convolution */ + std::unique_ptr _ws; /**< Weights tensor sub-tensors used for grouped convolution */ + std::unique_ptr _bs; /**< Biases tensor sub-tensors used for grouped convolution */ }; } // namespace graph } // namespace arm_compute diff --git a/arm_compute/graph/nodes/FloorLayer.h b/arm_compute/graph/nodes/FloorLayer.h new file mode 100644 index 0000000..40fde3b --- /dev/null +++ b/arm_compute/graph/nodes/FloorLayer.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GRAPH_FLOOR_LAYER_H__ +#define __ARM_COMPUTE_GRAPH_FLOOR_LAYER_H__ + +#include "arm_compute/graph/GraphContext.h" +#include "arm_compute/graph/INode.h" +#include "arm_compute/graph/Tensor.h" +#include "arm_compute/graph/Types.h" +namespace arm_compute +{ +namespace graph +{ +/** Floor layer node */ +class FloorLayer : public INode +{ +public: + // Inherited methods overriden: + std::unique_ptr instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override; +}; + +} // namespace graph +} // namespace arm_compute +#endif /* __ARM_COMPUTE_GRAPH_FLOOR_LAYER_H__ */ diff --git a/arm_compute/graph/nodes/FullyConnectedLayer.h b/arm_compute/graph/nodes/FullyConnectedLayer.h index 3e1fe23..d31e060 100644 --- a/arm_compute/graph/nodes/FullyConnectedLayer.h +++ b/arm_compute/graph/nodes/FullyConnectedLayer.h @@ -24,6 +24,7 @@ #ifndef __ARM_COMPUTE_GRAPH_FULLY_CONNECTED_LAYER_H__ #define __ARM_COMPUTE_GRAPH_FULLY_CONNECTED_LAYER_H__ +#include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/INode.h" #include "arm_compute/graph/Tensor.h" #include "arm_compute/graph/Types.h" @@ -33,7 +34,7 @@ namespace arm_compute namespace graph { /** Fully connected layer node */ -class FullyConnectedLayer : public INode +class FullyConnectedLayer final : public INode { public: /** Default constructor @@ -49,8 +50,7 @@ public: } // Inherited methods overriden: - std::unique_ptr instantiate_node(Hint hint, ITensor *input, ITensor *output) override; - void print_info() override; + std::unique_ptr instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override; // Inherited methods overriden: private: diff --git a/arm_compute/graph/nodes/L2NormalizeLayer.h b/arm_compute/graph/nodes/L2NormalizeLayer.h new file mode 100644 index 0000000..ab333a2 --- /dev/null +++ b/arm_compute/graph/nodes/L2NormalizeLayer.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GRAPH_L2NORMALIZE_LAYER_H__ +#define __ARM_COMPUTE_GRAPH_L2NORMALIZE_LAYER_H__ + +#include "arm_compute/graph/GraphContext.h" +#include "arm_compute/graph/INode.h" +#include "arm_compute/graph/Tensor.h" +#include "arm_compute/graph/Types.h" + +namespace arm_compute +{ +namespace graph +{ +/** L2Normalize layer node */ +class L2NormalizeLayer final : public INode +{ +public: + /** Default Constructor + * + * @param[in] axis Dimension along which to reduce. + * @param[in] epsilon Lower bound value for the normalization. + */ + explicit L2NormalizeLayer(unsigned int axis, float epsilon) + : _axis(axis), _epsilon(epsilon) + { + } + + // Inherited methods overriden: + std::unique_ptr instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override; + +private: + unsigned int _axis; + float _epsilon; +}; +} // namespace graph +} // namespace arm_compute +#endif /* __ARM_COMPUTE_GRAPH_L2NORMALIZE_LAYER_H__ */ diff --git a/arm_compute/graph/nodes/NormalizationLayer.h b/arm_compute/graph/nodes/NormalizationLayer.h new file mode 100644 index 0000000..02efd1c --- /dev/null +++ b/arm_compute/graph/nodes/NormalizationLayer.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GRAPH_NORMALIZATION_LAYER_H__ +#define __ARM_COMPUTE_GRAPH_NORMALIZATION_LAYER_H__ + +#include "arm_compute/graph/GraphContext.h" +#include "arm_compute/graph/INode.h" +#include "arm_compute/graph/Types.h" + +namespace arm_compute +{ +namespace graph +{ +/** Normalization layer node */ +class NormalizationLayer final : public INode +{ +public: + /** Default Constructor + * + * @param[in] norm_info Normalization layer information + */ + explicit NormalizationLayer(const NormalizationLayerInfo norm_info); + + // Inherited methods overriden: + std::unique_ptr instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override; + +private: + const NormalizationLayerInfo _norm_info; /**< Normalization layer information */ +}; +} // namespace graph +} // namespace arm_compute +#endif /* __ARM_COMPUTE_GRAPH_NORMALIZATION_LAYER_H__ */ diff --git a/arm_compute/graph/nodes/PoolingLayer.h b/arm_compute/graph/nodes/PoolingLayer.h index 14e2c6d..87b15d0 100644 --- a/arm_compute/graph/nodes/PoolingLayer.h +++ b/arm_compute/graph/nodes/PoolingLayer.h @@ -24,6 +24,7 @@ #ifndef __ARM_COMPUTE_GRAPH_POOLING_LAYER_H__ #define __ARM_COMPUTE_GRAPH_POOLING_LAYER_H__ +#include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/INode.h" #include "arm_compute/graph/Tensor.h" #include "arm_compute/graph/Types.h" @@ -33,7 +34,7 @@ namespace arm_compute namespace graph { /** Pooling layer node */ -class PoolingLayer : public INode +class PoolingLayer final : public INode { public: /** Default Constructor @@ -43,8 +44,7 @@ public: PoolingLayer(const PoolingLayerInfo pool_info); // Inherited methods overriden: - std::unique_ptr instantiate_node(Hint hint, ITensor *input, ITensor *output) override; - void print_info() override; + std::unique_ptr instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override; private: const PoolingLayerInfo _pool_info; /**< Pooling layer information */ diff --git a/arm_compute/graph/nodes/SoftmaxLayer.h b/arm_compute/graph/nodes/SoftmaxLayer.h index 1779ada..2e1bd98 100644 --- a/arm_compute/graph/nodes/SoftmaxLayer.h +++ b/arm_compute/graph/nodes/SoftmaxLayer.h @@ -24,10 +24,10 @@ #ifndef __ARM_COMPUTE_GRAPH_SOFTMAX_LAYER_H__ #define __ARM_COMPUTE_GRAPH_SOFTMAX_LAYER_H__ +#include "arm_compute/graph/GraphContext.h" #include "arm_compute/graph/INode.h" #include "arm_compute/graph/Tensor.h" #include "arm_compute/graph/Types.h" - namespace arm_compute { namespace graph @@ -37,8 +37,7 @@ class SoftmaxLayer : public INode { public: // Inherited methods overriden: - std::unique_ptr instantiate_node(Hint hint, ITensor *input, ITensor *output) override; - void print_info() override; + std::unique_ptr instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override; }; } // namespace graph diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h index 11affeb..1a7befc 100644 --- a/arm_compute/runtime/CL/CLScheduler.h +++ b/arm_compute/runtime/CL/CLScheduler.h @@ -32,6 +32,28 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTuner.h" +#if defined(ARM_COMPUTE_DEBUG_ENABLED) +namespace +{ +void printf_callback(const char *buffer, unsigned int len, size_t complete, void *user_data) +{ + printf("%.*s", len, buffer); +} + +// Create a cl_context with a printf_callback and user specified buffer size. +cl_context_properties properties[] = +{ + // Enable a printf callback function for this context. + CL_PRINTF_CALLBACK_ARM, reinterpret_cast(printf_callback), + // Request a minimum printf buffer size of 4MB for devices in the + // context that support this extension. + CL_PRINTF_BUFFERSIZE_ARM, static_cast(0x100000), + CL_CONTEXT_PLATFORM, reinterpret_cast(cl::Platform::get()()), + 0 +}; +} +#endif /* defined(ARM_COMPUTE_DEBUG_ENABLED) */ + namespace arm_compute { class ICLKernel; @@ -60,6 +82,10 @@ public: */ void default_init(ICLTuner *cl_tuner = nullptr) { +#if defined(ARM_COMPUTE_DEBUG_ENABLED) + cl::Context::setDefault(cl::Context(CL_DEVICE_TYPE_DEFAULT, properties)); +#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) + CLKernelLibrary::get().init("./cl_kernels/", cl::Context::getDefault(), cl::Device::getDefault()); init(cl::Context::getDefault(), cl::CommandQueue::getDefault(), cl::Device::getDefault(), cl_tuner); } diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h index 2aa52bf..40bff97 100644 --- a/arm_compute/runtime/NEON/NEFunctions.h +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -52,6 +52,7 @@ #include "arm_compute/runtime/NEON/functions/NEErode.h" #include "arm_compute/runtime/NEON/functions/NEFastCorners.h" #include "arm_compute/runtime/NEON/functions/NEFillBorder.h" +#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h" #include "arm_compute/runtime/NEON/functions/NEFloor.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h new file mode 100644 index 0000000..e9c8e27 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFLATTENLAYER_H__ +#define __ARM_COMPUTE_NEFLATTENLAYER_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute flatten. This function calls the following NEON kernel: +* +* -# @ref NEIm2ColKernel +* +*/ +class NEFlattenLayer : public INESimpleFunction +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: QS8/QS16/F16/F32 + * @param[out] output Output tensor with shape [w*h*d, input_batches] where: + * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output); +}; +} // namespace arm_compute + +#endif /* __ARM_COMPUTE_NEFLATTENLAYER_H__ */ \ No newline at end of file diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h index 5c36e80..7b038aa 100644 --- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h @@ -24,8 +24,10 @@ #ifndef __ARM_COMPUTE_NEPOOLINGLAYER_H__ #define __ARM_COMPUTE_NEPOOLINGLAYER_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" #include "arm_compute/core/Types.h" namespace arm_compute @@ -37,16 +39,28 @@ class ITensor; * -# @ref NEFillBorderKernel (executed if padding size is different from zero) * -# @ref NEPoolingLayerKernel */ -class NEPoolingLayer : public INESimpleFunction +class NEPoolingLayer : public IFunction { public: + /** Constructor */ + NEPoolingLayer(); /** Set the input and output tensors. * + * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only + * * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/QS16/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ void configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run() override; + +private: + NEPoolingLayerKernel _pooling_layer_kernel; + NEFillBorderKernel _border_handler; + bool _is_global_pooling_layer; }; } #endif /* __ARM_COMPUTE_NEPOOLINGLAYER_H__ */ diff --git a/data/dumps/exp_Q8.1.in b/data/dumps/exp_Q8.1.in deleted file mode 100644 index c926031..0000000 Binary files a/data/dumps/exp_Q8.1.in and /dev/null differ diff --git a/data/dumps/exp_Q8.1.out b/data/dumps/exp_Q8.1.out deleted file mode 100644 index 4d56a55..0000000 Binary files a/data/dumps/exp_Q8.1.out and /dev/null differ diff --git a/data/dumps/exp_Q8.2.in b/data/dumps/exp_Q8.2.in deleted file mode 100644 index 36a8b9a..0000000 Binary files a/data/dumps/exp_Q8.2.in and /dev/null differ diff --git a/data/dumps/exp_Q8.2.out b/data/dumps/exp_Q8.2.out deleted file mode 100644 index faacf91..0000000 Binary files a/data/dumps/exp_Q8.2.out and /dev/null differ diff --git a/data/dumps/exp_Q8.3.in b/data/dumps/exp_Q8.3.in deleted file mode 100644 index 9e42df4..0000000 Binary files a/data/dumps/exp_Q8.3.in and /dev/null differ diff --git a/data/dumps/exp_Q8.3.out b/data/dumps/exp_Q8.3.out deleted file mode 100644 index d2ed5d5..0000000 Binary files a/data/dumps/exp_Q8.3.out and /dev/null differ diff --git a/data/dumps/exp_Q8.4.in b/data/dumps/exp_Q8.4.in deleted file mode 100644 index 71802df..0000000 Binary files a/data/dumps/exp_Q8.4.in and /dev/null differ diff --git a/data/dumps/exp_Q8.4.out b/data/dumps/exp_Q8.4.out deleted file mode 100644 index 598f8de..0000000 Binary files a/data/dumps/exp_Q8.4.out and /dev/null differ diff --git a/data/dumps/exp_Q8.5.out b/data/dumps/exp_Q8.5.out deleted file mode 100644 index 7154619..0000000 Binary files a/data/dumps/exp_Q8.5.out and /dev/null differ diff --git a/data/dumps/exp_Q8.7.out b/data/dumps/exp_Q8.7.out deleted file mode 100644 index f100ea7..0000000 Binary files a/data/dumps/exp_Q8.7.out and /dev/null differ diff --git a/data/dumps/minus_Q8.5.out b/data/dumps/minus_Q8.5.out deleted file mode 100644 index 06d7405..0000000 Binary files a/data/dumps/minus_Q8.5.out and /dev/null differ diff --git a/data/dumps/minus_Q8.6.out b/data/dumps/minus_Q8.6.out deleted file mode 100644 index 06d7405..0000000 Binary files a/data/dumps/minus_Q8.6.out and /dev/null differ diff --git a/data/dumps/minus_Q8.7.out b/data/dumps/minus_Q8.7.out deleted file mode 100644 index 06d7405..0000000 Binary files a/data/dumps/minus_Q8.7.out and /dev/null differ diff --git a/data/dumps/mul_Q8.1.out b/data/dumps/mul_Q8.1.out deleted file mode 100644 index d35af5e..0000000 Binary files a/data/dumps/mul_Q8.1.out and /dev/null differ diff --git a/data/dumps/mul_Q8.2.out b/data/dumps/mul_Q8.2.out deleted file mode 100644 index a840751..0000000 Binary files a/data/dumps/mul_Q8.2.out and /dev/null differ diff --git a/data/dumps/mul_Q8.3.out b/data/dumps/mul_Q8.3.out deleted file mode 100644 index a56bec2..0000000 Binary files a/data/dumps/mul_Q8.3.out and /dev/null differ diff --git a/data/dumps/mul_Q8.4.out b/data/dumps/mul_Q8.4.out deleted file mode 100644 index a9fcedc..0000000 Binary files a/data/dumps/mul_Q8.4.out and /dev/null differ diff --git a/data/dumps/mul_Q8.5.out b/data/dumps/mul_Q8.5.out deleted file mode 100644 index a530924..0000000 Binary files a/data/dumps/mul_Q8.5.out and /dev/null differ diff --git a/data/dumps/mul_Q8.1.in b/data/fixed_point/add_Q8.1.in.npy similarity index 89% rename from data/dumps/mul_Q8.1.in rename to data/fixed_point/add_Q8.1.in.npy index aa631d2..b3765a7 100644 Binary files a/data/dumps/mul_Q8.1.in and b/data/fixed_point/add_Q8.1.in.npy differ diff --git a/data/fixed_point/add_Q8.1.out.npy b/data/fixed_point/add_Q8.1.out.npy new file mode 100644 index 0000000..a16e1cc Binary files /dev/null and b/data/fixed_point/add_Q8.1.out.npy differ diff --git a/data/dumps/mul_Q8.2.in b/data/fixed_point/add_Q8.2.in.npy similarity index 92% rename from data/dumps/mul_Q8.2.in rename to data/fixed_point/add_Q8.2.in.npy index ca881ab..e215385 100644 Binary files a/data/dumps/mul_Q8.2.in and b/data/fixed_point/add_Q8.2.in.npy differ diff --git a/data/fixed_point/add_Q8.2.out.npy b/data/fixed_point/add_Q8.2.out.npy new file mode 100644 index 0000000..46ca8bf Binary files /dev/null and b/data/fixed_point/add_Q8.2.out.npy differ diff --git a/data/dumps/mul_Q8.3.in b/data/fixed_point/add_Q8.3.in.npy similarity index 89% rename from data/dumps/mul_Q8.3.in rename to data/fixed_point/add_Q8.3.in.npy index 082d665..23e80f1 100644 Binary files a/data/dumps/mul_Q8.3.in and b/data/fixed_point/add_Q8.3.in.npy differ diff --git a/data/fixed_point/add_Q8.3.out.npy b/data/fixed_point/add_Q8.3.out.npy new file mode 100644 index 0000000..535291c Binary files /dev/null and b/data/fixed_point/add_Q8.3.out.npy differ diff --git a/data/dumps/mul_Q8.4.in b/data/fixed_point/add_Q8.4.in.npy similarity index 92% rename from data/dumps/mul_Q8.4.in rename to data/fixed_point/add_Q8.4.in.npy index cd5035e..a7033d9 100644 Binary files a/data/dumps/mul_Q8.4.in and b/data/fixed_point/add_Q8.4.in.npy differ diff --git a/data/fixed_point/add_Q8.4.out.npy b/data/fixed_point/add_Q8.4.out.npy new file mode 100644 index 0000000..1008d68 Binary files /dev/null and b/data/fixed_point/add_Q8.4.out.npy differ diff --git a/data/dumps/mul_Q8.5.in b/data/fixed_point/add_Q8.5.in.npy similarity index 89% rename from data/dumps/mul_Q8.5.in rename to data/fixed_point/add_Q8.5.in.npy index bf93114..e4d554e 100644 Binary files a/data/dumps/mul_Q8.5.in and b/data/fixed_point/add_Q8.5.in.npy differ diff --git a/data/fixed_point/add_Q8.5.out.npy b/data/fixed_point/add_Q8.5.out.npy new file mode 100644 index 0000000..6d87784 Binary files /dev/null and b/data/fixed_point/add_Q8.5.out.npy differ diff --git a/data/dumps/exp_Q8.6.in b/data/fixed_point/add_Q8.6.in.npy similarity index 92% rename from data/dumps/exp_Q8.6.in rename to data/fixed_point/add_Q8.6.in.npy index 454a381..8acb0f5 100644 Binary files a/data/dumps/exp_Q8.6.in and b/data/fixed_point/add_Q8.6.in.npy differ diff --git a/data/fixed_point/add_Q8.6.out.npy b/data/fixed_point/add_Q8.6.out.npy new file mode 100644 index 0000000..b5b58e5 Binary files /dev/null and b/data/fixed_point/add_Q8.6.out.npy differ diff --git a/data/dumps/minus_Q8.7.in b/data/fixed_point/add_Q8.7.in.npy similarity index 89% rename from data/dumps/minus_Q8.7.in rename to data/fixed_point/add_Q8.7.in.npy index ce6282b..6a9bb9a 100644 Binary files a/data/dumps/minus_Q8.7.in and b/data/fixed_point/add_Q8.7.in.npy differ diff --git a/data/fixed_point/add_Q8.7.out.npy b/data/fixed_point/add_Q8.7.out.npy new file mode 100644 index 0000000..0022af6 Binary files /dev/null and b/data/fixed_point/add_Q8.7.out.npy differ diff --git a/data/fixed_point/exp_Q8.1.in.npy b/data/fixed_point/exp_Q8.1.in.npy new file mode 100644 index 0000000..84e00e7 Binary files /dev/null and b/data/fixed_point/exp_Q8.1.in.npy differ diff --git a/data/fixed_point/exp_Q8.1.out.npy b/data/fixed_point/exp_Q8.1.out.npy new file mode 100644 index 0000000..87e87a2 Binary files /dev/null and b/data/fixed_point/exp_Q8.1.out.npy differ diff --git a/data/fixed_point/exp_Q8.2.in.npy b/data/fixed_point/exp_Q8.2.in.npy new file mode 100644 index 0000000..be9365e Binary files /dev/null and b/data/fixed_point/exp_Q8.2.in.npy differ diff --git a/data/fixed_point/exp_Q8.2.out.npy b/data/fixed_point/exp_Q8.2.out.npy new file mode 100644 index 0000000..5b1f31a Binary files /dev/null and b/data/fixed_point/exp_Q8.2.out.npy differ diff --git a/data/fixed_point/exp_Q8.3.in.npy b/data/fixed_point/exp_Q8.3.in.npy new file mode 100644 index 0000000..d3fa5cd Binary files /dev/null and b/data/fixed_point/exp_Q8.3.in.npy differ diff --git a/data/fixed_point/exp_Q8.3.out.npy b/data/fixed_point/exp_Q8.3.out.npy new file mode 100644 index 0000000..dee269f Binary files /dev/null and b/data/fixed_point/exp_Q8.3.out.npy differ diff --git a/data/dumps/plus_Q8.4.in b/data/fixed_point/exp_Q8.4.in.npy similarity index 92% rename from data/dumps/plus_Q8.4.in rename to data/fixed_point/exp_Q8.4.in.npy index cd5035e..a7033d9 100644 Binary files a/data/dumps/plus_Q8.4.in and b/data/fixed_point/exp_Q8.4.in.npy differ diff --git a/data/fixed_point/exp_Q8.4.out.npy b/data/fixed_point/exp_Q8.4.out.npy new file mode 100644 index 0000000..d205c2b Binary files /dev/null and b/data/fixed_point/exp_Q8.4.out.npy differ diff --git a/data/dumps/plus_Q8.5.in b/data/fixed_point/exp_Q8.5.in.npy similarity index 89% rename from data/dumps/plus_Q8.5.in rename to data/fixed_point/exp_Q8.5.in.npy index bf93114..e4d554e 100644 Binary files a/data/dumps/plus_Q8.5.in and b/data/fixed_point/exp_Q8.5.in.npy differ diff --git a/data/fixed_point/exp_Q8.5.out.npy b/data/fixed_point/exp_Q8.5.out.npy new file mode 100644 index 0000000..2e3b9a4 Binary files /dev/null and b/data/fixed_point/exp_Q8.5.out.npy differ diff --git a/data/dumps/minus_Q8.6.in b/data/fixed_point/exp_Q8.6.in.npy similarity index 92% rename from data/dumps/minus_Q8.6.in rename to data/fixed_point/exp_Q8.6.in.npy index 454a381..8acb0f5 100644 Binary files a/data/dumps/minus_Q8.6.in and b/data/fixed_point/exp_Q8.6.in.npy differ diff --git a/data/dumps/exp_Q8.6.out b/data/fixed_point/exp_Q8.6.out.npy similarity index 57% rename from data/dumps/exp_Q8.6.out rename to data/fixed_point/exp_Q8.6.out.npy index 9b7982b..2a4acc3 100644 Binary files a/data/dumps/exp_Q8.6.out and b/data/fixed_point/exp_Q8.6.out.npy differ diff --git a/data/dumps/exp_Q8.7.in b/data/fixed_point/exp_Q8.7.in.npy similarity index 89% rename from data/dumps/exp_Q8.7.in rename to data/fixed_point/exp_Q8.7.in.npy index ce6282b..6a9bb9a 100644 Binary files a/data/dumps/exp_Q8.7.in and b/data/fixed_point/exp_Q8.7.in.npy differ diff --git a/data/fixed_point/exp_Q8.7.out.npy b/data/fixed_point/exp_Q8.7.out.npy new file mode 100644 index 0000000..2eb2688 Binary files /dev/null and b/data/fixed_point/exp_Q8.7.out.npy differ diff --git a/data/dumps/log_Q8.1.in b/data/fixed_point/inv_sqrt_Q8.1.in.npy similarity index 79% rename from data/dumps/log_Q8.1.in rename to data/fixed_point/inv_sqrt_Q8.1.in.npy index 1da8513..25c04df 100644 Binary files a/data/dumps/log_Q8.1.in and b/data/fixed_point/inv_sqrt_Q8.1.in.npy differ diff --git a/data/fixed_point/inv_sqrt_Q8.1.out.npy b/data/fixed_point/inv_sqrt_Q8.1.out.npy new file mode 100644 index 0000000..164d2ee Binary files /dev/null and b/data/fixed_point/inv_sqrt_Q8.1.out.npy differ diff --git a/data/dumps/log_Q8.2.in b/data/fixed_point/inv_sqrt_Q8.2.in.npy similarity index 76% rename from data/dumps/log_Q8.2.in rename to data/fixed_point/inv_sqrt_Q8.2.in.npy index a236c37..97f5dff 100644 Binary files a/data/dumps/log_Q8.2.in and b/data/fixed_point/inv_sqrt_Q8.2.in.npy differ diff --git a/data/fixed_point/inv_sqrt_Q8.2.out.npy b/data/fixed_point/inv_sqrt_Q8.2.out.npy new file mode 100644 index 0000000..6bd25b2 Binary files /dev/null and b/data/fixed_point/inv_sqrt_Q8.2.out.npy differ diff --git a/data/dumps/log_Q8.3.in b/data/fixed_point/inv_sqrt_Q8.3.in.npy similarity index 78% rename from data/dumps/log_Q8.3.in rename to data/fixed_point/inv_sqrt_Q8.3.in.npy index 1482f64..afdb68f 100644 Binary files a/data/dumps/log_Q8.3.in and b/data/fixed_point/inv_sqrt_Q8.3.in.npy differ diff --git a/data/fixed_point/inv_sqrt_Q8.3.out.npy b/data/fixed_point/inv_sqrt_Q8.3.out.npy new file mode 100644 index 0000000..6b91115 Binary files /dev/null and b/data/fixed_point/inv_sqrt_Q8.3.out.npy differ diff --git a/data/dumps/log_Q8.4.in b/data/fixed_point/inv_sqrt_Q8.4.in.npy similarity index 80% rename from data/dumps/log_Q8.4.in rename to data/fixed_point/inv_sqrt_Q8.4.in.npy index 79026e9..f97b268 100644 Binary files a/data/dumps/log_Q8.4.in and b/data/fixed_point/inv_sqrt_Q8.4.in.npy differ diff --git a/data/fixed_point/inv_sqrt_Q8.4.out.npy b/data/fixed_point/inv_sqrt_Q8.4.out.npy new file mode 100644 index 0000000..b1c92fe Binary files /dev/null and b/data/fixed_point/inv_sqrt_Q8.4.out.npy differ diff --git a/data/dumps/log_Q8.5.in b/data/fixed_point/inv_sqrt_Q8.5.in.npy similarity index 77% rename from data/dumps/log_Q8.5.in rename to data/fixed_point/inv_sqrt_Q8.5.in.npy index 7da7770..d0f8f4b 100644 Binary files a/data/dumps/log_Q8.5.in and b/data/fixed_point/inv_sqrt_Q8.5.in.npy differ diff --git a/data/fixed_point/inv_sqrt_Q8.5.out.npy b/data/fixed_point/inv_sqrt_Q8.5.out.npy new file mode 100644 index 0000000..8488b07 Binary files /dev/null and b/data/fixed_point/inv_sqrt_Q8.5.out.npy differ diff --git a/data/dumps/log_Q8.6.in b/data/fixed_point/inv_sqrt_Q8.6.in.npy similarity index 82% rename from data/dumps/log_Q8.6.in rename to data/fixed_point/inv_sqrt_Q8.6.in.npy index 51880eb..e0160de 100644 Binary files a/data/dumps/log_Q8.6.in and b/data/fixed_point/inv_sqrt_Q8.6.in.npy differ diff --git a/data/fixed_point/inv_sqrt_Q8.6.out.npy b/data/fixed_point/inv_sqrt_Q8.6.out.npy new file mode 100644 index 0000000..108ecfa Binary files /dev/null and b/data/fixed_point/inv_sqrt_Q8.6.out.npy differ diff --git a/data/dumps/log_Q8.7.in b/data/fixed_point/inv_sqrt_Q8.7.in.npy similarity index 63% rename from data/dumps/log_Q8.7.in rename to data/fixed_point/inv_sqrt_Q8.7.in.npy index c3ac477..e3260db 100644 Binary files a/data/dumps/log_Q8.7.in and b/data/fixed_point/inv_sqrt_Q8.7.in.npy differ diff --git a/data/fixed_point/inv_sqrt_Q8.7.out.npy b/data/fixed_point/inv_sqrt_Q8.7.out.npy new file mode 100644 index 0000000..eeb92aa Binary files /dev/null and b/data/fixed_point/inv_sqrt_Q8.7.out.npy differ diff --git a/data/fixed_point/log_Q8.1.in.npy b/data/fixed_point/log_Q8.1.in.npy new file mode 100644 index 0000000..25c04df Binary files /dev/null and b/data/fixed_point/log_Q8.1.in.npy differ diff --git a/data/dumps/log_Q8.1.out b/data/fixed_point/log_Q8.1.out.npy similarity index 76% rename from data/dumps/log_Q8.1.out rename to data/fixed_point/log_Q8.1.out.npy index 028b82b..9ea6438 100644 Binary files a/data/dumps/log_Q8.1.out and b/data/fixed_point/log_Q8.1.out.npy differ diff --git a/data/fixed_point/log_Q8.2.in.npy b/data/fixed_point/log_Q8.2.in.npy new file mode 100644 index 0000000..97f5dff Binary files /dev/null and b/data/fixed_point/log_Q8.2.in.npy differ diff --git a/data/dumps/log_Q8.2.out b/data/fixed_point/log_Q8.2.out.npy similarity index 76% rename from data/dumps/log_Q8.2.out rename to data/fixed_point/log_Q8.2.out.npy index 2fc2f5d..b4eaf53 100644 Binary files a/data/dumps/log_Q8.2.out and b/data/fixed_point/log_Q8.2.out.npy differ diff --git a/data/fixed_point/log_Q8.3.in.npy b/data/fixed_point/log_Q8.3.in.npy new file mode 100644 index 0000000..afdb68f Binary files /dev/null and b/data/fixed_point/log_Q8.3.in.npy differ diff --git a/data/dumps/log_Q8.3.out b/data/fixed_point/log_Q8.3.out.npy similarity index 77% rename from data/dumps/log_Q8.3.out rename to data/fixed_point/log_Q8.3.out.npy index bba4a57..8df02f0 100644 Binary files a/data/dumps/log_Q8.3.out and b/data/fixed_point/log_Q8.3.out.npy differ diff --git a/data/fixed_point/log_Q8.4.in.npy b/data/fixed_point/log_Q8.4.in.npy new file mode 100644 index 0000000..f97b268 Binary files /dev/null and b/data/fixed_point/log_Q8.4.in.npy differ diff --git a/data/dumps/log_Q8.4.out b/data/fixed_point/log_Q8.4.out.npy similarity index 80% rename from data/dumps/log_Q8.4.out rename to data/fixed_point/log_Q8.4.out.npy index 44f9d9d..05f7368 100644 Binary files a/data/dumps/log_Q8.4.out and b/data/fixed_point/log_Q8.4.out.npy differ diff --git a/data/fixed_point/log_Q8.5.in.npy b/data/fixed_point/log_Q8.5.in.npy new file mode 100644 index 0000000..d0f8f4b Binary files /dev/null and b/data/fixed_point/log_Q8.5.in.npy differ diff --git a/data/dumps/log_Q8.5.out b/data/fixed_point/log_Q8.5.out.npy similarity index 84% rename from data/dumps/log_Q8.5.out rename to data/fixed_point/log_Q8.5.out.npy index 8d870ea..796e7f0 100644 Binary files a/data/dumps/log_Q8.5.out and b/data/fixed_point/log_Q8.5.out.npy differ diff --git a/data/fixed_point/log_Q8.6.in.npy b/data/fixed_point/log_Q8.6.in.npy new file mode 100644 index 0000000..e0160de Binary files /dev/null and b/data/fixed_point/log_Q8.6.in.npy differ diff --git a/data/dumps/log_Q8.6.out b/data/fixed_point/log_Q8.6.out.npy similarity index 82% rename from data/dumps/log_Q8.6.out rename to data/fixed_point/log_Q8.6.out.npy index b965ff1..4144038 100644 Binary files a/data/dumps/log_Q8.6.out and b/data/fixed_point/log_Q8.6.out.npy differ diff --git a/data/fixed_point/log_Q8.7.in.npy b/data/fixed_point/log_Q8.7.in.npy new file mode 100644 index 0000000..e3260db Binary files /dev/null and b/data/fixed_point/log_Q8.7.in.npy differ diff --git a/data/dumps/log_Q8.7.out b/data/fixed_point/log_Q8.7.out.npy similarity index 71% rename from data/dumps/log_Q8.7.out rename to data/fixed_point/log_Q8.7.out.npy index 859e5a1..02fc4c2 100644 Binary files a/data/dumps/log_Q8.7.out and b/data/fixed_point/log_Q8.7.out.npy differ diff --git a/data/dumps/minus_Q8.1.in b/data/fixed_point/minus_Q8.1.in.npy similarity index 89% rename from data/dumps/minus_Q8.1.in rename to data/fixed_point/minus_Q8.1.in.npy index aa631d2..b3765a7 100644 Binary files a/data/dumps/minus_Q8.1.in and b/data/fixed_point/minus_Q8.1.in.npy differ diff --git a/data/dumps/minus_Q8.1.out b/data/fixed_point/minus_Q8.1.out.npy similarity index 92% rename from data/dumps/minus_Q8.1.out rename to data/fixed_point/minus_Q8.1.out.npy index 06d7405..ad22160 100644 Binary files a/data/dumps/minus_Q8.1.out and b/data/fixed_point/minus_Q8.1.out.npy differ diff --git a/data/dumps/plus_Q8.2.in b/data/fixed_point/minus_Q8.2.in.npy similarity index 92% rename from data/dumps/plus_Q8.2.in rename to data/fixed_point/minus_Q8.2.in.npy index ca881ab..e215385 100644 Binary files a/data/dumps/plus_Q8.2.in and b/data/fixed_point/minus_Q8.2.in.npy differ diff --git a/data/dumps/minus_Q8.2.out b/data/fixed_point/minus_Q8.2.out.npy similarity index 92% rename from data/dumps/minus_Q8.2.out rename to data/fixed_point/minus_Q8.2.out.npy index 06d7405..ad22160 100644 Binary files a/data/dumps/minus_Q8.2.out and b/data/fixed_point/minus_Q8.2.out.npy differ diff --git a/data/dumps/plus_Q8.3.in b/data/fixed_point/minus_Q8.3.in.npy similarity index 89% rename from data/dumps/plus_Q8.3.in rename to data/fixed_point/minus_Q8.3.in.npy index 082d665..23e80f1 100644 Binary files a/data/dumps/plus_Q8.3.in and b/data/fixed_point/minus_Q8.3.in.npy differ diff --git a/data/dumps/minus_Q8.3.out b/data/fixed_point/minus_Q8.3.out.npy similarity index 92% rename from data/dumps/minus_Q8.3.out rename to data/fixed_point/minus_Q8.3.out.npy index 06d7405..ad22160 100644 Binary files a/data/dumps/minus_Q8.3.out and b/data/fixed_point/minus_Q8.3.out.npy differ diff --git a/data/dumps/minus_Q8.4.in b/data/fixed_point/minus_Q8.4.in.npy similarity index 92% rename from data/dumps/minus_Q8.4.in rename to data/fixed_point/minus_Q8.4.in.npy index cd5035e..a7033d9 100644 Binary files a/data/dumps/minus_Q8.4.in and b/data/fixed_point/minus_Q8.4.in.npy differ diff --git a/data/dumps/minus_Q8.4.out b/data/fixed_point/minus_Q8.4.out.npy similarity index 92% rename from data/dumps/minus_Q8.4.out rename to data/fixed_point/minus_Q8.4.out.npy index 06d7405..ad22160 100644 Binary files a/data/dumps/minus_Q8.4.out and b/data/fixed_point/minus_Q8.4.out.npy differ diff --git a/data/dumps/minus_Q8.5.in b/data/fixed_point/minus_Q8.5.in.npy similarity index 89% rename from data/dumps/minus_Q8.5.in rename to data/fixed_point/minus_Q8.5.in.npy index bf93114..e4d554e 100644 Binary files a/data/dumps/minus_Q8.5.in and b/data/fixed_point/minus_Q8.5.in.npy differ diff --git a/data/fixed_point/minus_Q8.5.out.npy b/data/fixed_point/minus_Q8.5.out.npy new file mode 100644 index 0000000..ad22160 Binary files /dev/null and b/data/fixed_point/minus_Q8.5.out.npy differ diff --git a/data/dumps/mul_Q8.6.in b/data/fixed_point/minus_Q8.6.in.npy similarity index 92% rename from data/dumps/mul_Q8.6.in rename to data/fixed_point/minus_Q8.6.in.npy index 454a381..8acb0f5 100644 Binary files a/data/dumps/mul_Q8.6.in and b/data/fixed_point/minus_Q8.6.in.npy differ diff --git a/data/fixed_point/minus_Q8.6.out.npy b/data/fixed_point/minus_Q8.6.out.npy new file mode 100644 index 0000000..ad22160 Binary files /dev/null and b/data/fixed_point/minus_Q8.6.out.npy differ diff --git a/data/dumps/mul_Q8.7.in b/data/fixed_point/minus_Q8.7.in.npy similarity index 89% rename from data/dumps/mul_Q8.7.in rename to data/fixed_point/minus_Q8.7.in.npy index ce6282b..6a9bb9a 100644 Binary files a/data/dumps/mul_Q8.7.in and b/data/fixed_point/minus_Q8.7.in.npy differ diff --git a/data/fixed_point/minus_Q8.7.out.npy b/data/fixed_point/minus_Q8.7.out.npy new file mode 100644 index 0000000..ad22160 Binary files /dev/null and b/data/fixed_point/minus_Q8.7.out.npy differ diff --git a/data/dumps/plus_Q8.1.in b/data/fixed_point/mul_Q8.1.in.npy similarity index 89% rename from data/dumps/plus_Q8.1.in rename to data/fixed_point/mul_Q8.1.in.npy index aa631d2..b3765a7 100644 Binary files a/data/dumps/plus_Q8.1.in and b/data/fixed_point/mul_Q8.1.in.npy differ diff --git a/data/fixed_point/mul_Q8.1.out.npy b/data/fixed_point/mul_Q8.1.out.npy new file mode 100644 index 0000000..1a501ab Binary files /dev/null and b/data/fixed_point/mul_Q8.1.out.npy differ diff --git a/data/dumps/minus_Q8.2.in b/data/fixed_point/mul_Q8.2.in.npy similarity index 92% rename from data/dumps/minus_Q8.2.in rename to data/fixed_point/mul_Q8.2.in.npy index ca881ab..e215385 100644 Binary files a/data/dumps/minus_Q8.2.in and b/data/fixed_point/mul_Q8.2.in.npy differ diff --git a/data/fixed_point/mul_Q8.2.out.npy b/data/fixed_point/mul_Q8.2.out.npy new file mode 100644 index 0000000..9b221e6 Binary files /dev/null and b/data/fixed_point/mul_Q8.2.out.npy differ diff --git a/data/dumps/minus_Q8.3.in b/data/fixed_point/mul_Q8.3.in.npy similarity index 89% rename from data/dumps/minus_Q8.3.in rename to data/fixed_point/mul_Q8.3.in.npy index 082d665..23e80f1 100644 Binary files a/data/dumps/minus_Q8.3.in and b/data/fixed_point/mul_Q8.3.in.npy differ diff --git a/data/fixed_point/mul_Q8.3.out.npy b/data/fixed_point/mul_Q8.3.out.npy new file mode 100644 index 0000000..0094c7a Binary files /dev/null and b/data/fixed_point/mul_Q8.3.out.npy differ diff --git a/data/fixed_point/mul_Q8.4.in.npy b/data/fixed_point/mul_Q8.4.in.npy new file mode 100644 index 0000000..a7033d9 Binary files /dev/null and b/data/fixed_point/mul_Q8.4.in.npy differ diff --git a/data/fixed_point/mul_Q8.4.out.npy b/data/fixed_point/mul_Q8.4.out.npy new file mode 100644 index 0000000..98ab823 Binary files /dev/null and b/data/fixed_point/mul_Q8.4.out.npy differ diff --git a/data/dumps/exp_Q8.5.in b/data/fixed_point/mul_Q8.5.in.npy similarity index 65% rename from data/dumps/exp_Q8.5.in rename to data/fixed_point/mul_Q8.5.in.npy index 0f8fe1e..e4d554e 100644 Binary files a/data/dumps/exp_Q8.5.in and b/data/fixed_point/mul_Q8.5.in.npy differ diff --git a/data/fixed_point/mul_Q8.5.out.npy b/data/fixed_point/mul_Q8.5.out.npy new file mode 100644 index 0000000..b66f393 Binary files /dev/null and b/data/fixed_point/mul_Q8.5.out.npy differ diff --git a/data/dumps/plus_Q8.6.in b/data/fixed_point/mul_Q8.6.in.npy similarity index 92% rename from data/dumps/plus_Q8.6.in rename to data/fixed_point/mul_Q8.6.in.npy index 454a381..8acb0f5 100644 Binary files a/data/dumps/plus_Q8.6.in and b/data/fixed_point/mul_Q8.6.in.npy differ diff --git a/data/dumps/mul_Q8.6.out b/data/fixed_point/mul_Q8.6.out.npy similarity index 57% rename from data/dumps/mul_Q8.6.out rename to data/fixed_point/mul_Q8.6.out.npy index 9c05a13..72c9546 100644 Binary files a/data/dumps/mul_Q8.6.out and b/data/fixed_point/mul_Q8.6.out.npy differ diff --git a/data/dumps/plus_Q8.7.in b/data/fixed_point/mul_Q8.7.in.npy similarity index 89% rename from data/dumps/plus_Q8.7.in rename to data/fixed_point/mul_Q8.7.in.npy index ce6282b..6a9bb9a 100644 Binary files a/data/dumps/plus_Q8.7.in and b/data/fixed_point/mul_Q8.7.in.npy differ diff --git a/data/dumps/mul_Q8.7.out b/data/fixed_point/mul_Q8.7.out.npy similarity index 86% rename from data/dumps/mul_Q8.7.out rename to data/fixed_point/mul_Q8.7.out.npy index 85a6744..732e916 100644 Binary files a/data/dumps/mul_Q8.7.out and b/data/fixed_point/mul_Q8.7.out.npy differ diff --git a/data/fixed_point/plus_Q8.1.in.npy b/data/fixed_point/plus_Q8.1.in.npy new file mode 100644 index 0000000..b3765a7 Binary files /dev/null and b/data/fixed_point/plus_Q8.1.in.npy differ diff --git a/data/dumps/plus_Q8.1.out b/data/fixed_point/plus_Q8.1.out.npy similarity index 92% rename from data/dumps/plus_Q8.1.out rename to data/fixed_point/plus_Q8.1.out.npy index 893f8c0..6a0e47e 100644 Binary files a/data/dumps/plus_Q8.1.out and b/data/fixed_point/plus_Q8.1.out.npy differ diff --git a/data/fixed_point/plus_Q8.2.in.npy b/data/fixed_point/plus_Q8.2.in.npy new file mode 100644 index 0000000..e215385 Binary files /dev/null and b/data/fixed_point/plus_Q8.2.in.npy differ diff --git a/data/dumps/plus_Q8.2.out b/data/fixed_point/plus_Q8.2.out.npy similarity index 92% rename from data/dumps/plus_Q8.2.out rename to data/fixed_point/plus_Q8.2.out.npy index 22d51f9..9c16a97 100644 Binary files a/data/dumps/plus_Q8.2.out and b/data/fixed_point/plus_Q8.2.out.npy differ diff --git a/data/fixed_point/plus_Q8.3.in.npy b/data/fixed_point/plus_Q8.3.in.npy new file mode 100644 index 0000000..23e80f1 Binary files /dev/null and b/data/fixed_point/plus_Q8.3.in.npy differ diff --git a/data/dumps/plus_Q8.3.out b/data/fixed_point/plus_Q8.3.out.npy similarity index 92% rename from data/dumps/plus_Q8.3.out rename to data/fixed_point/plus_Q8.3.out.npy index 56ad48b..b957f2a 100644 Binary files a/data/dumps/plus_Q8.3.out and b/data/fixed_point/plus_Q8.3.out.npy differ diff --git a/data/fixed_point/plus_Q8.4.in.npy b/data/fixed_point/plus_Q8.4.in.npy new file mode 100644 index 0000000..a7033d9 Binary files /dev/null and b/data/fixed_point/plus_Q8.4.in.npy differ diff --git a/data/dumps/plus_Q8.4.out b/data/fixed_point/plus_Q8.4.out.npy similarity index 92% rename from data/dumps/plus_Q8.4.out rename to data/fixed_point/plus_Q8.4.out.npy index 08eddea..544fab0 100644 Binary files a/data/dumps/plus_Q8.4.out and b/data/fixed_point/plus_Q8.4.out.npy differ diff --git a/data/fixed_point/plus_Q8.5.in.npy b/data/fixed_point/plus_Q8.5.in.npy new file mode 100644 index 0000000..e4d554e Binary files /dev/null and b/data/fixed_point/plus_Q8.5.in.npy differ diff --git a/data/dumps/plus_Q8.5.out b/data/fixed_point/plus_Q8.5.out.npy similarity index 92% rename from data/dumps/plus_Q8.5.out rename to data/fixed_point/plus_Q8.5.out.npy index 49d2868..d93230b 100644 Binary files a/data/dumps/plus_Q8.5.out and b/data/fixed_point/plus_Q8.5.out.npy differ diff --git a/data/fixed_point/plus_Q8.6.in.npy b/data/fixed_point/plus_Q8.6.in.npy new file mode 100644 index 0000000..8acb0f5 Binary files /dev/null and b/data/fixed_point/plus_Q8.6.in.npy differ diff --git a/data/dumps/plus_Q8.6.out b/data/fixed_point/plus_Q8.6.out.npy similarity index 92% rename from data/dumps/plus_Q8.6.out rename to data/fixed_point/plus_Q8.6.out.npy index 5441c6f..fc498ed 100644 Binary files a/data/dumps/plus_Q8.6.out and b/data/fixed_point/plus_Q8.6.out.npy differ diff --git a/data/fixed_point/plus_Q8.7.in.npy b/data/fixed_point/plus_Q8.7.in.npy new file mode 100644 index 0000000..6a9bb9a Binary files /dev/null and b/data/fixed_point/plus_Q8.7.in.npy differ diff --git a/data/dumps/plus_Q8.7.out b/data/fixed_point/plus_Q8.7.out.npy similarity index 92% rename from data/dumps/plus_Q8.7.out rename to data/fixed_point/plus_Q8.7.out.npy index 0717fd9..1b4d59a 100644 Binary files a/data/dumps/plus_Q8.7.out and b/data/fixed_point/plus_Q8.7.out.npy differ diff --git a/data/fixed_point/sub_Q8.1.in.npy b/data/fixed_point/sub_Q8.1.in.npy new file mode 100644 index 0000000..b3765a7 Binary files /dev/null and b/data/fixed_point/sub_Q8.1.in.npy differ diff --git a/data/fixed_point/sub_Q8.1.out.npy b/data/fixed_point/sub_Q8.1.out.npy new file mode 100644 index 0000000..ad22160 Binary files /dev/null and b/data/fixed_point/sub_Q8.1.out.npy differ diff --git a/data/fixed_point/sub_Q8.2.in.npy b/data/fixed_point/sub_Q8.2.in.npy new file mode 100644 index 0000000..e215385 Binary files /dev/null and b/data/fixed_point/sub_Q8.2.in.npy differ diff --git a/data/fixed_point/sub_Q8.2.out.npy b/data/fixed_point/sub_Q8.2.out.npy new file mode 100644 index 0000000..ad22160 Binary files /dev/null and b/data/fixed_point/sub_Q8.2.out.npy differ diff --git a/data/fixed_point/sub_Q8.3.in.npy b/data/fixed_point/sub_Q8.3.in.npy new file mode 100644 index 0000000..23e80f1 Binary files /dev/null and b/data/fixed_point/sub_Q8.3.in.npy differ diff --git a/data/fixed_point/sub_Q8.3.out.npy b/data/fixed_point/sub_Q8.3.out.npy new file mode 100644 index 0000000..ad22160 Binary files /dev/null and b/data/fixed_point/sub_Q8.3.out.npy differ diff --git a/data/fixed_point/sub_Q8.4.in.npy b/data/fixed_point/sub_Q8.4.in.npy new file mode 100644 index 0000000..a7033d9 Binary files /dev/null and b/data/fixed_point/sub_Q8.4.in.npy differ diff --git a/data/fixed_point/sub_Q8.4.out.npy b/data/fixed_point/sub_Q8.4.out.npy new file mode 100644 index 0000000..ad22160 Binary files /dev/null and b/data/fixed_point/sub_Q8.4.out.npy differ diff --git a/data/fixed_point/sub_Q8.5.in.npy b/data/fixed_point/sub_Q8.5.in.npy new file mode 100644 index 0000000..e4d554e Binary files /dev/null and b/data/fixed_point/sub_Q8.5.in.npy differ diff --git a/data/fixed_point/sub_Q8.5.out.npy b/data/fixed_point/sub_Q8.5.out.npy new file mode 100644 index 0000000..ad22160 Binary files /dev/null and b/data/fixed_point/sub_Q8.5.out.npy differ diff --git a/data/fixed_point/sub_Q8.6.in.npy b/data/fixed_point/sub_Q8.6.in.npy new file mode 100644 index 0000000..8acb0f5 Binary files /dev/null and b/data/fixed_point/sub_Q8.6.in.npy differ diff --git a/data/fixed_point/sub_Q8.6.out.npy b/data/fixed_point/sub_Q8.6.out.npy new file mode 100644 index 0000000..ad22160 Binary files /dev/null and b/data/fixed_point/sub_Q8.6.out.npy differ diff --git a/data/fixed_point/sub_Q8.7.in.npy b/data/fixed_point/sub_Q8.7.in.npy new file mode 100644 index 0000000..6a9bb9a Binary files /dev/null and b/data/fixed_point/sub_Q8.7.in.npy differ diff --git a/data/fixed_point/sub_Q8.7.out.npy b/data/fixed_point/sub_Q8.7.out.npy new file mode 100644 index 0000000..ad22160 Binary files /dev/null and b/data/fixed_point/sub_Q8.7.out.npy differ diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index 2b6ddfb..f543ab6 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -155,6 +155,17 @@ If there is more than one release in a month then an extra sequential number is @subsection S2_2_changelog Changelog +v17.10 Public maintenance release + - Bug fixes: + - Check the maximum local workgroup size supported by OpenCL devices + - Minor documentation updates (Fixed instructions to build the examples) + - Introduced a arm_compute::graph::GraphContext + - Added a few new Graph nodes and support for grouping. + - Automatically enable cl_printf in debug builds + - Fixed bare metal builds for armv7a + - Added AlexNet and cartoon effect examples + - Fixed library builds: libraries are no longer built as supersets of each other.(It means application using the Runtime part of the library now need to link against both libarm_compute_core and libarm_compute) + v17.09 Public major release - Experimental Graph support: initial implementation of a simple stream API to easily chain machine learning layers. - Memory Manager (@ref arm_compute::BlobLifetimeManager, @ref arm_compute::BlobMemoryPool, @ref arm_compute::ILifetimeManager, @ref arm_compute::IMemoryGroup, @ref arm_compute::IMemoryManager, @ref arm_compute::IMemoryPool, @ref arm_compute::IPoolManager, @ref arm_compute::MemoryManagerOnDemand, @ref arm_compute::PoolManager) @@ -480,38 +491,63 @@ The examples get automatically built by scons as part of the build process of th To cross compile a NEON example for Linux 32bit: - arm-linux-gnueabihf-g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -std=c++11 -mfpu=neon -L. -larm_compute -o neon_convolution + arm-linux-gnueabihf-g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -mfpu=neon -L. -larm_compute -larm_compute_core -o neon_convolution To cross compile a NEON example for Linux 64bit: - aarch64-linux-gnu-g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -std=c++11 -L. -larm_compute -o neon_convolution + aarch64-linux-gnu-g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -L. -larm_compute -larm_compute_core -o neon_convolution (notice the only difference with the 32 bit command is that we don't need the -mfpu option and the compiler's name is different) To cross compile an OpenCL example for Linux 32bit: - arm-linux-gnueabihf-g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -mfpu=neon -L. -larm_compute -lOpenCL -o cl_convolution -DARM_COMPUTE_CL + arm-linux-gnueabihf-g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -mfpu=neon -L. -larm_compute -larm_compute_core -lOpenCL -o cl_convolution -DARM_COMPUTE_CL To cross compile an OpenCL example for Linux 64bit: - aarch64-linux-gnu-g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -L. -larm_compute -lOpenCL -o cl_convolution -DARM_COMPUTE_CL + aarch64-linux-gnu-g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -L. -larm_compute -larm_compute_core -lOpenCL -o cl_convolution -DARM_COMPUTE_CL + +(notice the only difference with the 32 bit command is that we don't need the -mfpu option and the compiler's name is different) + +To cross compile the examples with the Graph API, such as graph_lenet.cpp, you need to link the library arm_compute_graph.so also. +(notice the compute library has to be built with both neon and opencl enabled - neon=1 and opencl=1) + +i.e. to cross compile the "graph_lenet" example for Linux 32bit: + + arm-linux-gnueabihf-g++ examples/graph_lenet.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -mfpu=neon -L. -larm_compute_graph -larm_compute -larm_compute_core -lOpenCL -o graph_lenet -DARM_COMPUTE_CL + +i.e. to cross compile the "graph_lenet" example for Linux 64bit: + + aarch64-linux-gnu-g++ examples/graph_lenet.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -L. -larm_compute_graph -larm_compute -larm_compute_core -lOpenCL -o graph_lenet -DARM_COMPUTE_CL (notice the only difference with the 32 bit command is that we don't need the -mfpu option and the compiler's name is different) To compile natively (i.e directly on an ARM device) for NEON for Linux 32bit: - g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -std=c++11 -mfpu=neon -larm_compute -o neon_convolution + g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -mfpu=neon -larm_compute -larm_compute_core -o neon_convolution To compile natively (i.e directly on an ARM device) for NEON for Linux 64bit: - g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -std=c++11 -larm_compute -o neon_convolution + g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute -larm_compute_core -o neon_convolution (notice the only difference with the 32 bit command is that we don't need the -mfpu option) To compile natively (i.e directly on an ARM device) for OpenCL for Linux 32bit or Linux 64bit: - g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute -lOpenCL -o cl_convolution -DARM_COMPUTE_CL + g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute -larm_compute_core -lOpenCL -o cl_convolution -DARM_COMPUTE_CL + +To compile natively (i.e directly on an ARM device) the examples with the Graph API, such as graph_lenet.cpp, you need to link the library arm_compute_graph.so also. +(notice the compute library has to be built with both neon and opencl enabled - neon=1 and opencl=1) + +i.e. to cross compile the "graph_lenet" example for Linux 32bit: + + g++ examples/graph_lenet.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -mfpu=neon -L. -larm_compute_graph -larm_compute -larm_compute_core -lOpenCL -o graph_lenet -DARM_COMPUTE_CL +i.e. to cross compile the "graph_lenet" example for Linux 64bit: + + g++ examples/graph_lenet.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 L. -larm_compute_graph -larm_compute -larm_compute_core -lOpenCL -o graph_lenet -DARM_COMPUTE_CL + +(notice the only difference with the 32 bit command is that we don't need the -mfpu option) @note These two commands assume libarm_compute.so is available in your library path, if not add the path to it using -L @@ -568,16 +604,24 @@ Once you've got your Android standalone toolchain built and added to your path y To cross compile a NEON example: #32 bit: - arm-linux-androideabi-clang++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o neon_convolution_arm -static-libstdc++ -pie + arm-linux-androideabi-clang++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute-static -larm_compute_core-static -L. -o neon_convolution_arm -static-libstdc++ -pie #64 bit: - aarch64-linux-android-g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o neon_convolution_aarch64 -static-libstdc++ -pie + aarch64-linux-android-g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute-static -larm_compute_core-static -L. -o neon_convolution_aarch64 -static-libstdc++ -pie To cross compile an OpenCL example: #32 bit: - arm-linux-androideabi-clang++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o cl_convolution_arm -static-libstdc++ -pie -lOpenCL -DARM_COMPUTE_CL + arm-linux-androideabi-clang++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute-static -larm_compute_core-static -L. -o cl_convolution_arm -static-libstdc++ -pie -lOpenCL -DARM_COMPUTE_CL #64 bit: - aarch64-linux-android-g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o cl_convolution_aarch64 -static-libstdc++ -pie -lOpenCL -DARM_COMPUTE_CL + aarch64-linux-android-g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute-static -larm_compute_core-static -L. -o cl_convolution_aarch64 -static-libstdc++ -pie -lOpenCL -DARM_COMPUTE_CL + +To cross compile the examples with the Graph API, such as graph_lenet.cpp, you need to link the library arm_compute_graph also. +(notice the compute library has to be built with both neon and opencl enabled - neon=1 and opencl=1) + + #32 bit: + arm-linux-androideabi-clang++ examples/graph_lenet.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute_graph-static -larm_compute-static -larm_compute_core-static -L. -o graph_lenet_arm -static-libstdc++ -pie -lOpenCL -DARM_COMPUTE_CL + #64 bit: + aarch64-linux-android-g++ examples/graph_lenet.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute_graph-static -larm_compute-static -larm_compute_core-static -L. -o graph_lenet_aarch64 -static-libstdc++ -pie -lOpenCL -DARM_COMPUTE_CL @note Due to some issues in older versions of the Mali OpenCL DDK (<= r13p0), we recommend to link arm_compute statically on Android. @@ -603,14 +647,34 @@ And finally to run the example: adb shell /data/local/tmp/neon_convolution_aarch64 adb shell /data/local/tmp/cl_convolution_aarch64 -@subsection S3_4_windows_host Building on a Windows host system +@subsection S3_4_bare_metal Building for bare metal + +For bare metal, the library was successfully built using linaros's latest (gcc-linaro-6.3.1-2017.05) bare metal toolchains: + - arm-eabi for armv7a + - aarch64-elf for arm64-v8a + +Download linaro for armv7a and arm64-v8a. + +@note Make sure to add the toolchains to your PATH: export PATH=$PATH:$MY_TOOLCHAINS/gcc-linaro-6.3.1-2017.05-x86_64_aarch64-elf/bin:$MY_TOOLCHAINS/gcc-linaro-6.3.1-2017.05-x86_64_arm-eabi/bin + +@subsubsection S3_4_1_library How to build the library ? + +To cross-compile the library with NEON support for baremetal arm64-v8a: + + scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=bare_metal arch=arm64-v8a build=cross_compile cppthreads=0 openmp=0 standalone=1 + +@subsubsection S3_4_2_examples How to manually build the examples ? + +Examples are disabled when building for bare metal. If you want to build the examples you need to provide a custom bootcode depending on the target architecture and link against the compute library. More information about bare metal bootcode can be found here. + +@subsection S3_5_windows_host Building on a Windows host system Using `scons` directly from the Windows command line is known to cause problems. The reason seems to be that if `scons` is setup for cross-compilation it gets confused about Windows style paths (using backslashes). Thus it is recommended to follow one of the options outlined below. -@subsubsection S3_4_1_ubuntu_on_windows Bash on Ubuntu on Windows +@subsubsection S3_5_1_ubuntu_on_windows Bash on Ubuntu on Windows The best and easiest option is to use Ubuntu on Windows. @@ -618,7 +682,7 @@ This feature is still marked as *beta* and thus might not be available. However, if it is building the library is as simple as opening a *Bash on Ubuntu on Windows* shell and following the general guidelines given above. -@subsubsection S3_4_2_cygwin Cygwin +@subsubsection S3_5_2_cygwin Cygwin If the Windows subsystem for Linux is not available Cygwin can be used to install and run `scons`. In addition to the default packages @@ -631,7 +695,7 @@ compiler is included in the Android standalone toolchain. After everything has been set up in the Cygwin terminal the general guide on building the library can be followed. -@subsection S3_5_cl_stub_library The OpenCL stub library +@subsection S3_6_cl_stub_library The OpenCL stub library In the opencl-1.2-stubs folder you will find the sources to build a stub OpenCL library which then can be used to link your application or arm_compute against. diff --git a/docs/02_tests.dox b/docs/02_tests.dox index c39431f..209acd6 100644 --- a/docs/02_tests.dox +++ b/docs/02_tests.dox @@ -315,6 +315,23 @@ As an alternative output format JSON is supported and can be selected via `--log-format=json`. To write the output to a file instead of stdout the `--log-file` option can be used. +@subsubsection tests_running_tests_benchmarking_mode Mode +Tests contain different datasets of different sizes, some of which will take several hours to run. +You can select which datasets to use by using the `--mode` option, we recommed you use `--mode=precommit` to start with. + +@subsubsection tests_running_tests_benchmarking_instruments Instruments +You can use the `--instruments` option to select one or more instruments to measure the execution time of the benchmark tests. + +`PMU` will try to read the CPU PMU events from the kernel (They need to be enabled on your platform) + +`MALI` will try to collect Mali hardware performance counters. (You need to have a recent enough Mali driver) + +`WALL_CLOCK` will measure time using `gettimeofday`: this should work on all platforms. + +You can pass a combinations of these instruments: `--instruments=PMU,MALI,WALL_CLOCK` + +@note You need to make sure the instruments have been selected at compile time using the `pmu=1` or `mali=1` scons options. + @subsection tests_running_tests_validation Validation @note The new validation tests have the same interface as the benchmarking tests. diff --git a/docs/Doxyfile b/docs/Doxyfile index ee50981..c0713d0 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "Compute Library" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 17.09 +PROJECT_NUMBER = 17.10 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/documentation/00__introduction_8dox.xhtml b/documentation/00__introduction_8dox.xhtml index a10c288..46d090e 100644 --- a/documentation/00__introduction_8dox.xhtml +++ b/documentation/00__introduction_8dox.xhtml @@ -38,7 +38,7 @@
Compute Library -  17.09 +  17.10
@@ -121,7 +121,7 @@ $(document).ready(function(){initNavTree('00__introduction_8dox.xhtml','');});