inference-engine/thirdparty/clDNN/api/CPP/cldnn_defs.h

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 /*! @mainpage clDNN Documentation
  18 * @section intro Introduction
  19 * Compute Library for Deep Neural Networks (clDNN) is a middle-ware software
  20 *  for accelerating DNN inference on Intel&reg; HD and Iris&trade; Pro Graphics.
  21 *  This project includes CNN primitives implementations on Intel GPUs with C and C++ interfaces.
  22 *
  23 * clDNN Library implements set of primitives:
  24 *  - Convolution
  25 *  - Fully connected (inner product)
  26 *  - Pooling
  27 *   * average
  28 *   * maximum
  29 *  - Normalization
  30 *   * across channel
  31 *   * within channel
  32 *   * batch
  33 *  - Activation
  34 *   * logistic
  35 *   * tanh
  36 *   * rectified linear unit (ReLU)
  37 *   * softplus (softReLU)
  38 *   * abs
  39 *   * square
  40 *   * sqrt
  41 *   * linear
  42 *  - Softmax
  43 *  - Crop
  44 *  - Deconvolution
  45 *  - Depth concatenation
  46 *  - Eltwise
  47 *  - ROI pooling
  48 *  - Simpler NMS
  49 *  - Prior box
  50 *  - Detection output
  51 *
  52 *  With this primitive set, user can build and execute most common image recognition, semantic segmentation and object detection networks topologies like:
  53 *   - Alexnet
  54 *   - Googlenet(v1-v3)
  55 *   - ResNet
  56 *   - VGG
  57 *   - faster-rCNN
  58 * and other.
  59 *
  60 *
  61 * @section model Programming Model
  62 *  Intel&reg; clDNN is graph oriented library. To execute CNN you have to build, compile graph/topology and run to get results.
  63 *
  64 *  <B> Terminology: </B>
  65 *  - Primitive - dnn base functionality i.e. convolution, pooling, softmax.
  66 *  - Data - special primitive type representing primitive parameters (weights and biases), inputs and outputs
  67 *  - Engine - type of accelerator that is executing network. Currently ocl engine is the only available.
  68 *  - Topology - container of primitives, data, and relations between them. Topology represents graph.
  69 *  - Program - optional step between Topology and Network. It is compiled Topology without memory allocation.
  70 *  - Network - compiled Topology with memory allocation. Ready to be executed. During compilation, buidling parameters trigger special optimizations like fusing, data reordering.
  71 *
  72 *  <B> Execution Steps: </B>
  73 *
  74 * \image html workflow.jpg
  75 * -# Create Engine
  76 * -# Declare or define primitives parameters (weights and biases) if needed.
  77 * -# Create primitives. It is required to provide name for each primitive. This is a name of primitive which output will be input to current one. Name can be used before primitive definition.
  78 * -# Create topology
  79 * -# Add primitives to topology
  80 * -# Build Network from topology
  81 * -# Set Inputs data
  82 * -# Execute Network
  83 *
  84 *
  85 * @section graph_compilation Graph compilation
  86 *
  87 * If user choose build option optimize_data when program is being created - explicit or implicit over network creation, clDNN perform some graph optimizations as follows:
  88 * * <B> Stage 0: Graph initiation:</B>
  89 *  * build nodes from primitives
  90 *  * node replacement:
  91 *   * replace each split node with series of crop nodes. Name of crop primitive will be concatenation of split + port names.
  92 *   * replace upsampling node with deconvolution node if upsampling mode is bilinear.
  93 *  * set outputs - mark nodes that are defined by user as output (blocks fusing etc) or have no users (leafs).
  94 *  * calculate processing order - using dfs on graph to establish processing order
  95 * * <B> Stage 1: Priorboxes:</B>
  96 *  * priorbox is primitive that is executed during network compilation. Node is removed from a network execution.
  97 * * <B> Stage 2: Graph analysis:</B>
  98 *  * mark constatns
  99 *  * mark data flow
 100 * * <B> Stage 3: Trimming:</B>
 101 *  * apply backward bfs on each output to find unnecessary nodes/branches, then remove those.
 102 * * <B> Stage 4: Inputs and biases:</B>
 103 *  * reorder input - format of convolution's input/output is being selected.
 104 *  * reorder biases for conv,fc and deconv nodes
 105 * * <B> Stage 5: Redundant reorders:</B>
 106 *  * previous stages can provide additional reorders due to format changes per primitive. This stage removes redundant and fuses series of reorders into one.
 107 * * <B> Stage 6: Constant propagation:</B>
 108 *  * prepare padding - goes thrugh all primitves and checks if its user requires padding, if so, set output padding.
 109 *  * prepare depthwise separable opt - if split param is greater than 16 and number of IFM <= 8*split in conv or deconv, this stage changes execution from multi kernels into one.
 110 *  * constant propagation - replace constant nodes, that are not outputs with data type nodes. Constant primitive is the primitive that doesn't depend on any non-constant primitive and doesn't have to be executed: priorbox, data.
 111 * * <B> Stage 7: Fusing:</B>
 112 *  * buffer fusing
 113 *   * concat - if concatenation is the only user of its dependencies then remove concat node and setting proper output paddings in every dependencies.
 114 *   * crop - if crop has only one dependecy, and its users doesn't require padding, remove crop and set proper output padding in its dependecy.
 115 *   * reorder - if primitive before reorder supports different input vs output type reorder can be fused with previous node.
 116 *  * primitive fusing - right now this stage fuses activation node with previous node only, only if previous node supports activation fusing.
 117 * * <B> Stage 8: Compile graph:</B>
 118 *  * at this stage using kernel selector, graph chooses the best kernel implementation for each node.
 119 * * <B> Stage 9: reorder weights:</B>
 120 *  * at this stage weights are converted into format suitable for selected kernel implementation.
 121 * * <B> Stage 10 & 11: Redundant reorders and constant propagation:</B>
 122 *  * check again if whole graph compilation didn't provide any redundant reorders and constants.
 123 * * <B> Stage 12: Compile program:</B>
 124 *  * at this stage engine compiles cl_kernels.
 125 *
 126 * @section example C++ API Example MNIST network
 127 * @include example_cldnn.cpp
 128 */
 129
 130 ///////////////////////////////////////////////////////////////////////////////////////////////////
 131 #pragma once
 132
 133 #include <functional>
 134 #include <string>
 135 #include <type_traits>
 136 #include <utility>
 137 #include <vector>
 138 #include <stdexcept>
 139
 140 #include "../C/cldnn.h"
 141
 142 namespace cldnn {
 143     // There is no portable half precision floating point support.
 144     // Using wrapped integral type with the same size and alignment restrictions.
 145     class half_impl
 146     {
 147     public:
 148         half_impl() = default;
 149         template <typename T, typename = typename std::enable_if<!std::is_floating_point<T>::value>::type>
 150         explicit half_impl(T data) : _data(data) {}
 151
 152         operator uint16_t() const { return _data; }
 153         operator float() const
 154         {
 155            cldnn_status status = CLDNN_SUCCESS;
 156            auto value = cldnn_half_to_float(_data, &status);
 157            if (status != CLDNN_SUCCESS)
 158                throw std::runtime_error("Conversion from half failed");
 159            return value;
 160         }
 161         explicit half_impl(float value)
 162         {
 163             cldnn_status status = CLDNN_SUCCESS;
 164             _data = cldnn_float_to_half(value, &status);
 165             if (status != CLDNN_SUCCESS)
 166                 throw std::runtime_error("Conversion to half failed");
 167         }
 168
 169     private:
 170         uint16_t _data;
 171     };
 172 }
 173 // Use complete implementation if necessary.
 174 #if defined HALF_HALF_HPP
 175 typedef half half_t;
 176 #else
 177 typedef cldnn::half_impl half_t;
 178 #endif
 179
 180 namespace cldnn {
 181 /// @addtogroup cpp_api C++ API
 182 /// @{
 183
 184 /// @defgroup cpp_error Error Handling
 185 /// @{
 186
 187 using status_t = ::cldnn_status;
 188
 189 /// @brief clDNN specific exception type.
 190 class error : public std::runtime_error
 191 {
 192 public:
 193     explicit error(const std::string& _Message, status_t status = CLDNN_ERROR)
 194         : runtime_error(_Message)
 195         , _status(status)
 196     {
 197     }
 198
 199     explicit error(const char* _Message, status_t status = CLDNN_ERROR)
 200         : runtime_error(_Message)
 201         , _status(status)
 202     {
 203     }
 204
 205     /// @brief Returns clDNN status code.
 206     const status_t& status() const { return _status; }
 207 private:
 208     status_t _status;
 209 };
 210
 211 #define CLDNN_THROW(msg, status) throw cldnn::error(msg, status);
 212
 213 template<class T>
 214 T check_status(std::string err_msg, std::function<T(status_t*)> func)
 215 {
 216     status_t status = CLDNN_SUCCESS;
 217     auto result = func(&status);
 218     if (status != CLDNN_SUCCESS)
 219         CLDNN_THROW(err_msg.append(": ").append(cldnn_get_last_error_message()), status);
 220     return result;
 221 }
 222
 223 template<>
 224 inline void check_status<void>(std::string err_msg, std::function<void(status_t*)> func)
 225 {
 226     status_t status = CLDNN_SUCCESS;
 227     func(&status);
 228     if (status != CLDNN_SUCCESS)
 229         CLDNN_THROW(err_msg.append(": ").append(cldnn_get_last_error_message()), status);
 230 }
 231
 232 /// @}
 233
 234 /// @defgroup cpp_version Version Information
 235 /// @{
 236
 237 using version_t = ::cldnn_version;
 238
 239 /// @brief Get information about version of clDNN.
 240 inline version_t get_version()
 241 {
 242     return check_status<version_t>("get_version: fetching version information failed",
 243                                    [](status_t* status)
 244                                    {
 245                                        return ::cldnn_get_version(status);
 246                                    });
 247 }
 248
 249 /// @}
 250
 251 /// @cond CPP_HELPERS
 252
 253 /// @defgroup cpp_helpers Helpers
 254 /// @{
 255
 256 #define CLDNN_API_CLASS(the_class) static_assert(std::is_standard_layout<the_class>::value, #the_class " has to be 'standart layout' class");
 257
 258
 259 template<typename T>
 260 typename std::enable_if<std::is_integral<T>::value, T>::type align_to(T size, size_t align) {
 261     return static_cast<T>((size % align == 0) ? size : size - size % align + align);
 262 }
 263
 264 template<typename T>
 265 typename std::enable_if<std::is_integral<T>::value, T>::type pad_to(T size, size_t align) {
 266     return static_cast<T>((size % align == 0) ? 0 : align - size % align);
 267 }
 268
 269 template<typename T>
 270 typename std::enable_if<std::is_integral<T>::value, bool>::type is_aligned_to(T size, size_t align)
 271 {
 272     return !(size % align);
 273 }
 274
 275 /// Computes ceil(@p val / @p divider) on unsigned integral numbers.
 276 ///
 277 /// Computes division of unsigned integral numbers and rounds result up to full number (ceiling).
 278 /// The function works for unsigned integrals only. Signed integrals are converted to corresponding
 279 /// unsigned ones.
 280 ///
 281 /// @tparam T1   Type of @p val. Type must be integral (SFINAE).
 282 /// @tparam T2   Type of @p divider. Type must be integral (SFINAE).
 283 ///
 284 /// @param val       Divided value. If value is signed, it will be converted to corresponding unsigned type.
 285 /// @param divider   Divider value. If value is signed, it will be converted to corresponding unsigned type.
 286 ///
 287 /// @return   Result of ceil(@p val / @p divider). The type of result is determined as if in normal integral
 288 ///           division, except each operand is converted to unsigned type if necessary.
 289 template <typename T1, typename T2>
 290 constexpr auto ceil_div(T1 val, T2 divider)
 291     -> typename std::enable_if<std::is_integral<T1>::value && std::is_integral<T2>::value,
 292                                decltype(std::declval<typename std::make_unsigned<T1>::type>() / std::declval<typename std::make_unsigned<T2>::type>())>::type
 293 {
 294     typedef typename std::make_unsigned<T1>::type UT1;
 295     typedef typename std::make_unsigned<T2>::type UT2;
 296     typedef decltype(std::declval<UT1>() / std::declval<UT2>()) RetT;
 297
 298     return static_cast<RetT>((static_cast<UT1>(val) + static_cast<UT2>(divider) - 1U) / static_cast<UT2>(divider));
 299 }
 300
 301 /// Rounds @p val to nearest multiply of @p rounding that is greater or equal to @p val.
 302 ///
 303 /// The function works for unsigned integrals only. Signed integrals are converted to corresponding
 304 /// unsigned ones.
 305 ///
 306 /// @tparam T1       Type of @p val. Type must be integral (SFINAE).
 307 /// @tparam T2       Type of @p rounding. Type must be integral (SFINAE).
 308 ///
 309 /// @param val        Value to round up. If value is signed, it will be converted to corresponding unsigned type.
 310 /// @param rounding   Rounding value. If value is signed, it will be converted to corresponding unsigned type.
 311 ///
 312 /// @return   @p val rounded up to nearest multiply of @p rounding. The type of result is determined as if in normal integral
 313 ///           division, except each operand is converted to unsigned type if necessary.
 314 template <typename T1, typename T2>
 315 constexpr auto round_up_to(T1 val, T2 rounding)
 316     -> typename std::enable_if<std::is_integral<T1>::value && std::is_integral<T2>::value,
 317                                decltype(std::declval<typename std::make_unsigned<T1>::type>() / std::declval<typename std::make_unsigned<T2>::type>())>::type
 318 {
 319     typedef typename std::make_unsigned<T1>::type UT1;
 320     typedef typename std::make_unsigned<T2>::type UT2;
 321     typedef decltype(std::declval<UT1>() / std::declval<UT2>()) RetT;
 322
 323     return static_cast<RetT>(ceil_div(val, rounding) * static_cast<UT2>(rounding));
 324 }
 325
 326 ///
 327 /// \brief Converts C API float array to std::vector<float>
 328 ///
 329 inline std::vector<float> float_arr_to_vector(const cldnn_float_arr& arr)
 330 {
 331     std::vector<float> result(arr.size);
 332     for (size_t i = 0; i < arr.size; i++)
 333     {
 334         result[i] = arr.data[i];
 335     }
 336     return result;
 337 }
 338
 339 ///
 340 /// \brief Converts C API float array to std::vector<uint16_t>
 341 ///
 342 inline std::vector<uint16_t> uint16_t_arr_to_vector(const cldnn_uint16_t_arr& arr)
 343 {
 344     std::vector<uint16_t> result(arr.size);
 345     for (size_t i = 0; i < arr.size; i++)
 346     {
 347         result[i] = arr.data[i];
 348     }
 349     return result;
 350 }
 351
 352 ///
 353 /// \brief Converts C API uint8_t array to std::vector<uint8_t>
 354 ///
 355 inline std::vector<uint8_t> uint8_t_arr_to_vector(const cldnn_uint8_t_arr& arr)
 356 {
 357     std::vector<uint8_t> result(arr.size);
 358     for (size_t i = 0; i < arr.size; i++)
 359     {
 360         result[i] = arr.data[i];
 361     }
 362     return result;
 363 }
 364
 365 ///
 366 /// \brief Converts std::vector<float> to C API float_array
 367 ///
 368 inline cldnn_float_arr float_vector_to_arr(const std::vector<float>& stor)
 369 {
 370     return { stor.data(), stor.size() };
 371 }
 372
 373 ///
 374 /// \brief Converts std::vector<uint16_t> to C API float_array
 375 ///
 376 inline cldnn_uint16_t_arr uint16_t_vector_to_arr(const std::vector<uint16_t>& stor)
 377 {
 378     return{ stor.data(), stor.size() };
 379 }
 380
 381 ///
 382 /// \brief Converts std::vector<uint8_t> to C API uint8_t array
 383 ///
 384 inline cldnn_uint8_t_arr uint8_t_vector_to_arr(const std::vector<uint8_t>& stor)
 385 {
 386     return{ stor.data(), stor.size() };
 387 }
 388
 389 ///
 390 /// \brief Converts std::vector<tensor> to C API tensor_array
 391 ///
 392 inline cldnn_tensor_arr tensor_vector_to_arr(const std::vector<cldnn_tensor>& stor)
 393 {
 394     return cldnn_tensor_arr{ stor.data(), stor.size() };
 395 }
 396
 397 ///
 398 /// \brief Converts C API tensor_array to std::vector of C API tensor
 399 ///
 400 inline std::vector<cldnn_tensor> tensor_arr_to_cldnn_vector(const cldnn_tensor_arr& arr)
 401 {
 402     std::vector<cldnn_tensor> result(arr.size);
 403     for (size_t i = 0; i < arr.size; i++)
 404         result[i] = arr.data[i];
 405
 406     return result;
 407 }
 408
 409 /// @}
 410
 411 /// @endcond
 412
 413 /// @}
 414 }