ext/nnstreamer/tensor_filter/tensor_filter_ncnn.cc

   1 /* SPDX-License-Identifier: LGPL-2.1-only */
   2 /**
   3  * @file    tensor_filter_ncnn.cc
   4  * @date    18 Dec 2023
   5  * @brief   NNStreamer tensor-filter sub-plugin for Tencent ncnn
   6  * @author  Sungbin Jo <goranmoomin@daum.net>
   7  * @author  SangLyul Cho <chosanglyul@gmail.com>
   8  * @author  Kijun Shin <sharaelong.shin@gmail.com>
   9  * @see     http://github.com/nnstreamer/nnstreamer
  10  * @bug     No known bugs.
  11  *
  12  * This is the ncnn plugin for tensor_filter.
  13  *
  14  * @details Usage examples
  15  *  Case 1: image classification by squeezenet
  16  *  Case 2: object detection by mobilenetv2-ssdlite
  17  *
  18  * @note Special considerations on properties:
  19  *  input, inputtype, output, outputtype:
  20  *    It is essential to set these four options correctly.
  21  *    For assistance in configuring the shape and type,
  22  *    please refer to the examples provided.
  23  *
  24  *  accelerator:
  25  *    Enable Vulkan acceleration by setting accelerator=true:gpu.
  26  *    This option is applicable if your device is equipped
  27  *    with any Vulkan-acceleratable processor.
  28  *
  29  *  custom:
  30  *    Each entries are separated by ','
  31  *    Each entries have property_key:value format.
  32  *    There must be no spaces.
  33  *
  34  *    Supported custom properties:
  35  *      use_yolo_decoder (optional, default=false)
  36  *        Enable this option by setting use_yolo_decoder=true if your model
  37  *        includes a Yolov3DetectionOutput layer or yolo-related output layers,
  38  *        especially when dealing with variable output sizes (num_detection, 6).
  39  *        In such cases, you must also configure
  40  *        output=(5+num_labels, max_detection, 1) and outputtype=float32.
  41  *        To calculate the max_detection for an input image of size (w, h),
  42  *        use the formula: (w/32)*(h/32) + (w/16)*(h/16) + (w/8)*(h/8)*3.
  43  *        See also: https://github.com/nnstreamer/nnstreamer/blob/main/ext/nnstreamer/tensor_decoder/tensordec-boundingbox.c#L1194
  44  */
  45
  46 #include <functional>
  47 #include <glib.h>
  48 #include <nnstreamer_cppplugin_api_filter.hh>
  49 #include <nnstreamer_log.h>
  50 #include <nnstreamer_plugin_api.h>
  51 #include <nnstreamer_util.h>
  52 #include <tensor_common.h>
  53 #include <thread>
  54
  55 #include <ncnn/net.h>
  56
  57 namespace nnstreamer
  58 {
  59 namespace tensorfilter_ncnn
  60 {
  61
  62 G_BEGIN_DECLS
  63
  64 void init_filter_ncnn (void) __attribute__ ((constructor));
  65 void fini_filter_ncnn (void) __attribute__ ((destructor));
  66
  67 G_END_DECLS
  68
  69 /**
  70  * @brief Class for ncnn subplugin.
  71  */
  72 class ncnn_subplugin final : public tensor_filter_subplugin
  73 {
  74   public:
  75   static void init_filter_ncnn (); /**< Dynamic library contstructor helper */
  76   static void fini_filter_ncnn (); /**< Dynamic library desctructor helper */
  77
  78   ncnn_subplugin ();
  79   ~ncnn_subplugin ();
  80
  81   /**< Implementations of ncnn tensor_filter_subplugin */
  82   tensor_filter_subplugin &getEmptyInstance ();
  83   void configure_instance (const GstTensorFilterProperties *prop);
  84   void invoke (const GstTensorMemory *input, GstTensorMemory *output);
  85   void getFrameworkInfo (GstTensorFilterFrameworkInfo &info);
  86   int getModelInfo (model_info_ops ops, GstTensorsInfo &in_info, GstTensorsInfo &out_info);
  87   int eventHandler (event_ops ops, GstTensorFilterFrameworkEventData &data);
  88
  89   private:
  90   bool empty_model; /**< Empty (not initialized) model flag */
  91   static const GstTensorFilterFrameworkInfo info; /**< Framework info */
  92   GstTensorsInfo inputInfo; /**< Input tensors metadata */
  93   GstTensorsInfo outputInfo; /**< Output tensors metadata */
  94   bool use_yolo_decoder; /**< Yolo decoder flag to fix output dimension */
  95
  96   static ncnn_subplugin *registeredRepresentation;
  97
  98   ncnn::Net net; /**< Model symbol */
  99   std::vector<ncnn::Mat> input_mats; /**< Matrices of inputs */
 100   std::vector<ncnn::Mat> output_mats; /**< Matrices of outputs */
 101
 102   void parseCustomProperties (const GstTensorFilterProperties *prop);
 103   static void input_thread (ncnn::Extractor &ex, const int idx,
 104       const ncnn::Mat &in, const void *input_data, const uint32_t num_bytes);
 105   static void extract_thread (ncnn::Extractor &ex, const int idx,
 106       ncnn::Mat &out, void *output_data, const uint32_t num_bytes);
 107 };
 108
 109 /**
 110  * @brief Describe framework information.
 111  */
 112 const GstTensorFilterFrameworkInfo ncnn_subplugin::info = { .name = "ncnn",
 113   .allow_in_place = FALSE,
 114   .allocate_in_invoke = FALSE,
 115   .run_without_model = FALSE,
 116   .verify_model_path = TRUE,
 117   .hw_list = (const accl_hw[]){ ACCL_CPU, ACCL_GPU },
 118   .num_hw = 2,
 119   .accl_auto = ACCL_CPU,
 120   .accl_default = ACCL_CPU,
 121   .statistics = nullptr };
 122
 123 /**
 124  * @brief Construct a new ncnn subplugin::ncnn subplugin object
 125  */
 126 ncnn_subplugin::ncnn_subplugin () : tensor_filter_subplugin ()
 127 {
 128   gst_tensors_info_init (std::addressof (inputInfo));
 129   gst_tensors_info_init (std::addressof (outputInfo));
 130 }
 131
 132 /**
 133  * @brief Destroy the ncnn subplugin::ncnn subplugin object
 134  */
 135 ncnn_subplugin::~ncnn_subplugin ()
 136 {
 137   gst_tensors_info_free (std::addressof (inputInfo));
 138   gst_tensors_info_free (std::addressof (outputInfo));
 139
 140   if (empty_model)
 141     return;
 142
 143   empty_model = true;
 144 }
 145
 146 /**
 147  * @brief Method to get empty instance of ncnn subplugin.
 148  */
 149 tensor_filter_subplugin &
 150 ncnn_subplugin::getEmptyInstance ()
 151 {
 152   return *(new ncnn_subplugin ());
 153 }
 154
 155 /**
 156  * @brief Configure the instance of the ncnn subplugin.
 157  */
 158 void
 159 ncnn_subplugin::configure_instance (const GstTensorFilterProperties *prop)
 160 {
 161   // get input / output info from properties
 162   gst_tensors_info_copy (std::addressof (inputInfo), std::addressof (prop->input_meta));
 163   gst_tensors_info_copy (std::addressof (outputInfo), std::addressof (prop->output_meta));
 164
 165   // check number of model files
 166   if (prop->num_models > 2 || prop->num_models <= 0) {
 167     throw std::invalid_argument (std::string ("Number of model files must be 1 or 2;")
 168                                  + " Multiple model is not supported.");
 169   }
 170
 171   // try to parse custom properties of the ncnn_subplugin
 172   try {
 173     // parse custom properties
 174     parseCustomProperties (prop);
 175   } catch (const std::invalid_argument &e) {
 176     throw std::invalid_argument (
 177         "Failed to parse custom property : " + std::string (e.what ()));
 178   }
 179
 180   // decide use vulkan acceleration
 181   if (std::find (prop->hw_list, prop->hw_list + prop->num_hw, ACCL_GPU)
 182       != (prop->hw_list + prop->num_hw)) {
 183     net.opt.use_vulkan_compute = true;
 184     g_message ("accl = gpu\n");
 185   } else {
 186     net.opt.use_vulkan_compute = false;
 187   }
 188
 189   // load model files
 190   // ncnn returns nonzero value when an error occurs
 191   if (prop->num_models == 1) {
 192     if (net.load_param_bin (prop->model_files[0]))
 193       throw std::invalid_argument (
 194           "Failed to open the model file " + std::string (prop->model_files[0]));
 195   } else {
 196     if (net.load_param (prop->model_files[0]))
 197       throw std::invalid_argument (
 198           "Failed to open the param file " + std::string (prop->model_files[0]));
 199     if (net.load_model (prop->model_files[1]))
 200       throw std::invalid_argument (
 201           "Failed to open the bin file " + std::string (prop->model_files[1]));
 202   }
 203
 204   // get input layers from the ncnn network
 205   const std::vector<int> &input_indexes = net.input_indexes ();
 206   input_mats.clear ();
 207   if (inputInfo.num_tensors != input_indexes.size ())
 208     throw std::invalid_argument (
 209         std::string ("Wrong number of input matrices")
 210         + ": Found in argument = " + std::to_string (inputInfo.num_tensors)
 211         + ", Found in model file = " + std::to_string (input_indexes.size ()));
 212
 213   // init input matrices
 214   for (guint i = 0; i < inputInfo.num_tensors; i++) {
 215     // get dimensions of the input matrix from inputInfo
 216     const uint32_t *dim = gst_tensors_info_get_nth_info (&inputInfo, i)->dimension;
 217     std::vector<int> shape;
 218     while (*dim)
 219       shape.push_back (*dim++);
 220
 221     // make ncnn matrix object
 222     ncnn::Mat in;
 223     switch (shape.size ()) {
 224       case 1:
 225         in = ncnn::Mat (shape[0]);
 226         break;
 227       case 2:
 228         in = ncnn::Mat (shape[0], shape[1]);
 229         break;
 230       case 3:
 231         in = ncnn::Mat (shape[0], shape[1], shape[2]);
 232         break;
 233       case 4:
 234         in = ncnn::Mat (shape[0], shape[1], shape[2], shape[3]);
 235         break;
 236       default:
 237         throw std::invalid_argument ("ncnn subplugin supports only up to 4 ranks and does not support input tensors of "
 238                                      + std::to_string (shape.size ()) + " dimensions.");
 239     }
 240     input_mats.push_back (in);
 241   }
 242
 243   // get output layers from the ncnn network
 244   const std::vector<int> &output_indexes = net.output_indexes ();
 245   output_mats.clear ();
 246   if (outputInfo.num_tensors != output_indexes.size ())
 247     throw std::invalid_argument (
 248         std::string ("Wrong number of output matrices")
 249         + ": Found in argument = " + std::to_string (outputInfo.num_tensors)
 250         + ", Found in model file = " + std::to_string (output_indexes.size ()));
 251
 252   // init output matrices
 253   output_mats.resize (outputInfo.num_tensors);
 254
 255   empty_model = false;
 256 }
 257
 258 /**
 259  * @brief Invoke ncnn model and get the inference result.
 260  */
 261 void
 262 ncnn_subplugin::invoke (const GstTensorMemory *input, GstTensorMemory *output)
 263 {
 264   if (empty_model)
 265     throw std::runtime_error (
 266         "Model is empty: the ncnn instance is not configured and "
 267         "its \"invoke\" method is called. This may be an internal bug of "
 268         "nnstreamer or ncnn-subplugin unless if you have directly accessed "
 269         "ncnn-subplugin.");
 270
 271   // make extractor instance for each inference
 272   ncnn::Extractor ex = net.create_extractor ();
 273
 274   // get input layer indices
 275   std::vector<std::thread> input_thrs;
 276   const std::vector<int> &input_indexes = net.input_indexes ();
 277
 278   // get input from input tensor and push to the network
 279   const char *input_data = (const char *) input->data;
 280   for (guint i = 0; i < inputInfo.num_tensors; i++) {
 281     ncnn::Mat &in = input_mats.at (i);
 282     const uint32_t num_bytes = (in.elembits () / 8) * in.total ();
 283     input_thrs.emplace_back (input_thread, std::ref (ex), input_indexes.at (i),
 284         std::ref (in), input_data, num_bytes);
 285     input_data += num_bytes;
 286   }
 287
 288   // join threads
 289   for (std::thread &thr : input_thrs)
 290     thr.join ();
 291
 292   // get output layer indices
 293   std::vector<std::thread> output_thrs;
 294   const std::vector<int> &output_indexes = net.output_indexes ();
 295
 296   if (use_yolo_decoder) {
 297     // get output and store to ncnn matrix
 298     for (guint i = 0; i < outputInfo.num_tensors; i++) {
 299       ncnn::Mat &out = output_mats.at (i);
 300       output_thrs.emplace_back (extract_thread, std::ref (ex),
 301           output_indexes.at (i), std::ref (out), nullptr, 0);
 302     }
 303
 304     // memset output to zero and hide latency by multithreading
 305     memset (output->data, 0, output->size);
 306
 307     // join threads
 308     for (std::thread &thr : output_thrs)
 309       thr.join ();
 310
 311     // write detection-box infos to the output tensor
 312     for (guint i = 0; i < outputInfo.num_tensors; i++) {
 313       ncnn::Mat &out = output_mats.at (i);
 314       const int label_count
 315           = gst_tensors_info_get_nth_info (&outputInfo, i)->dimension[0];
 316       float *output_data = (float *) output->data;
 317       for (int j = 0; j < out.h; j++) {
 318         float *values = out.row (j);
 319         values[2] = fmaxf (fminf (values[2], 1.0), 0.0);
 320         values[3] = fmaxf (fminf (values[3], 1.0), 0.0);
 321         values[4] = fmaxf (fminf (values[4], 1.0), 0.0);
 322         values[5] = fmaxf (fminf (values[5], 1.0), 0.0);
 323
 324         output_data[0] = (values[2] + values[4]) / 2;
 325         output_data[1] = (values[3] + values[5]) / 2;
 326         output_data[2] = values[4] - values[2];
 327         output_data[3] = values[5] - values[3];
 328         output_data[4] = values[1];
 329         output_data[5 + (int) values[0]] = 1;
 330         output_data += label_count;
 331       }
 332     }
 333   } else {
 334     // get output and store to the output tensor
 335     char *output_data = (char *) output->data;
 336     for (guint i = 0; i < outputInfo.num_tensors; i++) {
 337       ncnn::Mat &out = output_mats.at (i);
 338       const uint32_t num_bytes = (out.elembits () / 8) * out.total ();
 339       output_thrs.emplace_back (extract_thread, std::ref (ex),
 340           output_indexes.at (i), std::ref (out), output_data, num_bytes);
 341       output_data += num_bytes;
 342     }
 343
 344     // join threads
 345     for (std::thread &thr : output_thrs)
 346       thr.join ();
 347   }
 348 }
 349
 350 /**
 351  * @brief Get ncnn frameworks info
 352  */
 353 void
 354 ncnn_subplugin::getFrameworkInfo (GstTensorFilterFrameworkInfo &info)
 355 {
 356   info = ncnn_subplugin::info;
 357 }
 358
 359 /**
 360  * @brief Get ncnn model information
 361  */
 362 int
 363 ncnn_subplugin::getModelInfo (
 364     model_info_ops ops, GstTensorsInfo &in_info, GstTensorsInfo &out_info)
 365 {
 366   switch (ops) {
 367     case GET_IN_OUT_INFO:
 368       gst_tensors_info_copy (std::addressof (in_info), std::addressof (inputInfo));
 369       gst_tensors_info_copy (std::addressof (out_info), std::addressof (outputInfo));
 370       break;
 371     case SET_INPUT_INFO:
 372     default:
 373       return -ENOENT;
 374   }
 375   return 0;
 376 }
 377
 378 /**
 379  * @brief Method to handle the event
 380  */
 381 int
 382 ncnn_subplugin::eventHandler (event_ops ops, GstTensorFilterFrameworkEventData &data)
 383 {
 384   UNUSED (ops);
 385   UNUSED (data);
 386   return -ENOENT;
 387 }
 388
 389 /**
 390  * @brief Parse custom prop and set instance options accordingly.
 391  */
 392 void
 393 ncnn_subplugin::parseCustomProperties (const GstTensorFilterProperties *prop)
 394 {
 395   using uniq_g_strv = std::unique_ptr<gchar *, std::function<void (gchar **)>>;
 396   const char *custom_props = prop->custom_properties;
 397
 398   // set default values
 399   use_yolo_decoder = false;
 400
 401   if (custom_props) {
 402     // split with , to parse options
 403     uniq_g_strv options (g_strsplit (custom_props, ",", -1), g_strfreev);
 404     guint len = g_strv_length (options.get ());
 405
 406     for (guint i = 0; i < len; i++) {
 407       // split with = to parse single option
 408       uniq_g_strv option (g_strsplit (options.get ()[i], ":", -1), g_strfreev);
 409
 410       // we only have key=value form option
 411       if (g_strv_length (option.get ()) == 2) {
 412         g_strstrip (option.get ()[0]);
 413         g_strstrip (option.get ()[1]);
 414
 415         if (g_ascii_strcasecmp (option.get ()[0], "use_yolo_decoder") == 0) {
 416           // true or false (default) only
 417           if (g_ascii_strcasecmp (option.get ()[1], "true") == 0) {
 418             use_yolo_decoder = true;
 419           } else if (g_ascii_strcasecmp (option.get ()[1], "false") == 0) {
 420             use_yolo_decoder = false;
 421           } else {
 422             throw std::invalid_argument ("Invalid option for use_yolo_decoder: "
 423                                          + std::string (option.get ()[1]) + ".");
 424           }
 425         } else {
 426           throw std::invalid_argument (
 427               "Unsupported custom property: " + std::string (option.get ()[0]) + ".");
 428         }
 429       } else {
 430         throw std::invalid_argument (
 431             "Unsupported custom property: " + std::string (options.get ()[i]) + ".");
 432       }
 433     }
 434   }
 435 }
 436
 437 /**
 438  * @brief Worker function when inserting inputs to the input layer.
 439  */
 440 void
 441 ncnn_subplugin::input_thread (ncnn::Extractor &ex, const int idx,
 442     const ncnn::Mat &in, const void *input_data, const uint32_t num_bytes)
 443 {
 444   // copy from the input matrix
 445   memcpy (in.data, input_data, num_bytes);
 446
 447   // input to the network
 448   ex.input (idx, in);
 449 }
 450
 451 /**
 452  * @brief Worker function when getting result from the output layer.
 453  */
 454 void
 455 ncnn_subplugin::extract_thread (ncnn::Extractor &ex, const int idx,
 456     ncnn::Mat &out, void *output_data, const uint32_t num_bytes)
 457 {
 458   // output from the network
 459   ex.extract (idx, out);
 460
 461   // copy to the output matrix
 462   if (output_data)
 463     memcpy (output_data, out.data, num_bytes);
 464 }
 465
 466 ncnn_subplugin *ncnn_subplugin::registeredRepresentation = nullptr;
 467
 468 /**
 469  * @brief Initialize the object for runtime register
 470  */
 471 void
 472 ncnn_subplugin::init_filter_ncnn (void)
 473 {
 474   registeredRepresentation
 475       = tensor_filter_subplugin::register_subplugin<ncnn_subplugin> ();
 476 }
 477
 478 /**
 479  * @brief Destruct the subplugin
 480  */
 481 void
 482 ncnn_subplugin::fini_filter_ncnn (void)
 483 {
 484   assert (registeredRepresentation != nullptr);
 485   tensor_filter_subplugin::unregister_subplugin (registeredRepresentation);
 486 }
 487
 488 /**
 489  * @brief initializer
 490  */
 491 void
 492 init_filter_ncnn ()
 493 {
 494   ncnn_subplugin::init_filter_ncnn ();
 495 }
 496
 497 /**
 498  * @brief finalizer
 499  */
 500 void
 501 fini_filter_ncnn ()
 502 {
 503   ncnn_subplugin::fini_filter_ncnn ();
 504 }
 505
 506 } // namespace tensorfilter_ncnn
 507 } /* namespace nnstreamer */