1 /* SPDX-License-Identifier: LGPL-2.1-only */
3 * @file tensor_filter_ncnn.cc
5 * @brief NNStreamer tensor-filter sub-plugin for Tencent ncnn
6 * @author Sungbin Jo <goranmoomin@daum.net>
7 * @author SangLyul Cho <chosanglyul@gmail.com>
8 * @author Kijun Shin <sharaelong.shin@gmail.com>
9 * @see http://github.com/nnstreamer/nnstreamer
12 * This is the ncnn plugin for tensor_filter.
14 * @details Usage examples
15 * Case 1: image classification by squeezenet
16 * Case 2: object detection by mobilenetv2-ssdlite
18 * @note Special considerations on properties:
19 * input, inputtype, output, outputtype:
20 * It is essential to set these four options correctly.
21 * For assistance in configuring the shape and type,
22 * please refer to the examples provided.
25 * Enable Vulkan acceleration by setting accelerator=true:gpu.
26 * This option is applicable if your device is equipped
27 * with any Vulkan-acceleratable processor.
30 * Each entries are separated by ','
31 * Each entries have property_key:value format.
32 * There must be no spaces.
34 * Supported custom properties:
35 * use_yolo_decoder (optional, default=false)
36 * Enable this option by setting use_yolo_decoder=true if your model
37 * includes a Yolov3DetectionOutput layer or yolo-related output layers,
38 * especially when dealing with variable output sizes (num_detection, 6).
39 * In such cases, you must also configure
40 * output=(5+num_labels, max_detection, 1) and outputtype=float32.
41 * To calculate the max_detection for an input image of size (w, h),
42 * use the formula: (w/32)*(h/32) + (w/16)*(h/16) + (w/8)*(h/8)*3.
43 * See also: https://github.com/nnstreamer/nnstreamer/blob/main/ext/nnstreamer/tensor_decoder/tensordec-boundingbox.c#L1194
48 #include <nnstreamer_cppplugin_api_filter.hh>
49 #include <nnstreamer_log.h>
50 #include <nnstreamer_plugin_api.h>
51 #include <nnstreamer_util.h>
52 #include <tensor_common.h>
59 namespace tensorfilter_ncnn
64 void init_filter_ncnn (void) __attribute__ ((constructor));
65 void fini_filter_ncnn (void) __attribute__ ((destructor));
70 * @brief Class for ncnn subplugin.
72 class ncnn_subplugin final : public tensor_filter_subplugin
75 static void init_filter_ncnn (); /**< Dynamic library contstructor helper */
76 static void fini_filter_ncnn (); /**< Dynamic library desctructor helper */
81 /**< Implementations of ncnn tensor_filter_subplugin */
82 tensor_filter_subplugin &getEmptyInstance ();
83 void configure_instance (const GstTensorFilterProperties *prop);
84 void invoke (const GstTensorMemory *input, GstTensorMemory *output);
85 void getFrameworkInfo (GstTensorFilterFrameworkInfo &info);
86 int getModelInfo (model_info_ops ops, GstTensorsInfo &in_info, GstTensorsInfo &out_info);
87 int eventHandler (event_ops ops, GstTensorFilterFrameworkEventData &data);
90 bool empty_model; /**< Empty (not initialized) model flag */
91 static const GstTensorFilterFrameworkInfo info; /**< Framework info */
92 GstTensorsInfo inputInfo; /**< Input tensors metadata */
93 GstTensorsInfo outputInfo; /**< Output tensors metadata */
94 bool use_yolo_decoder; /**< Yolo decoder flag to fix output dimension */
96 static ncnn_subplugin *registeredRepresentation;
98 ncnn::Net net; /**< Model symbol */
99 std::vector<ncnn::Mat> input_mats; /**< Matrices of inputs */
100 std::vector<ncnn::Mat> output_mats; /**< Matrices of outputs */
102 void parseCustomProperties (const GstTensorFilterProperties *prop);
103 static void input_thread (ncnn::Extractor &ex, const int idx,
104 const ncnn::Mat &in, const void *input_data, const uint32_t num_bytes);
105 static void extract_thread (ncnn::Extractor &ex, const int idx,
106 ncnn::Mat &out, void *output_data, const uint32_t num_bytes);
110 * @brief Describe framework information.
112 const GstTensorFilterFrameworkInfo ncnn_subplugin::info = { .name = "ncnn",
113 .allow_in_place = FALSE,
114 .allocate_in_invoke = FALSE,
115 .run_without_model = FALSE,
116 .verify_model_path = TRUE,
117 .hw_list = (const accl_hw[]){ ACCL_CPU, ACCL_GPU },
119 .accl_auto = ACCL_CPU,
120 .accl_default = ACCL_CPU,
121 .statistics = nullptr };
124 * @brief Construct a new ncnn subplugin::ncnn subplugin object
126 ncnn_subplugin::ncnn_subplugin () : tensor_filter_subplugin ()
128 gst_tensors_info_init (std::addressof (inputInfo));
129 gst_tensors_info_init (std::addressof (outputInfo));
133 * @brief Destroy the ncnn subplugin::ncnn subplugin object
135 ncnn_subplugin::~ncnn_subplugin ()
137 gst_tensors_info_free (std::addressof (inputInfo));
138 gst_tensors_info_free (std::addressof (outputInfo));
147 * @brief Method to get empty instance of ncnn subplugin.
149 tensor_filter_subplugin &
150 ncnn_subplugin::getEmptyInstance ()
152 return *(new ncnn_subplugin ());
156 * @brief Configure the instance of the ncnn subplugin.
159 ncnn_subplugin::configure_instance (const GstTensorFilterProperties *prop)
161 // get input / output info from properties
162 gst_tensors_info_copy (std::addressof (inputInfo), std::addressof (prop->input_meta));
163 gst_tensors_info_copy (std::addressof (outputInfo), std::addressof (prop->output_meta));
165 // check number of model files
166 if (prop->num_models > 2 || prop->num_models <= 0) {
167 throw std::invalid_argument (std::string ("Number of model files must be 1 or 2;")
168 + " Multiple model is not supported.");
171 // try to parse custom properties of the ncnn_subplugin
173 // parse custom properties
174 parseCustomProperties (prop);
175 } catch (const std::invalid_argument &e) {
176 throw std::invalid_argument (
177 "Failed to parse custom property : " + std::string (e.what ()));
180 // decide use vulkan acceleration
181 if (std::find (prop->hw_list, prop->hw_list + prop->num_hw, ACCL_GPU)
182 != (prop->hw_list + prop->num_hw)) {
183 net.opt.use_vulkan_compute = true;
184 g_message ("accl = gpu\n");
186 net.opt.use_vulkan_compute = false;
190 // ncnn returns nonzero value when an error occurs
191 if (prop->num_models == 1) {
192 if (net.load_param_bin (prop->model_files[0]))
193 throw std::invalid_argument (
194 "Failed to open the model file " + std::string (prop->model_files[0]));
196 if (net.load_param (prop->model_files[0]))
197 throw std::invalid_argument (
198 "Failed to open the param file " + std::string (prop->model_files[0]));
199 if (net.load_model (prop->model_files[1]))
200 throw std::invalid_argument (
201 "Failed to open the bin file " + std::string (prop->model_files[1]));
204 // get input layers from the ncnn network
205 const std::vector<int> &input_indexes = net.input_indexes ();
207 if (inputInfo.num_tensors != input_indexes.size ())
208 throw std::invalid_argument (
209 std::string ("Wrong number of input matrices")
210 + ": Found in argument = " + std::to_string (inputInfo.num_tensors)
211 + ", Found in model file = " + std::to_string (input_indexes.size ()));
213 // init input matrices
214 for (guint i = 0; i < inputInfo.num_tensors; i++) {
215 // get dimensions of the input matrix from inputInfo
216 const uint32_t *dim = gst_tensors_info_get_nth_info (&inputInfo, i)->dimension;
217 std::vector<int> shape;
219 shape.push_back (*dim++);
221 // make ncnn matrix object
223 switch (shape.size ()) {
225 in = ncnn::Mat (shape[0]);
228 in = ncnn::Mat (shape[0], shape[1]);
231 in = ncnn::Mat (shape[0], shape[1], shape[2]);
234 in = ncnn::Mat (shape[0], shape[1], shape[2], shape[3]);
237 throw std::invalid_argument ("ncnn subplugin supports only up to 4 ranks and does not support input tensors of "
238 + std::to_string (shape.size ()) + " dimensions.");
240 input_mats.push_back (in);
243 // get output layers from the ncnn network
244 const std::vector<int> &output_indexes = net.output_indexes ();
245 output_mats.clear ();
246 if (outputInfo.num_tensors != output_indexes.size ())
247 throw std::invalid_argument (
248 std::string ("Wrong number of output matrices")
249 + ": Found in argument = " + std::to_string (outputInfo.num_tensors)
250 + ", Found in model file = " + std::to_string (output_indexes.size ()));
252 // init output matrices
253 output_mats.resize (outputInfo.num_tensors);
259 * @brief Invoke ncnn model and get the inference result.
262 ncnn_subplugin::invoke (const GstTensorMemory *input, GstTensorMemory *output)
265 throw std::runtime_error (
266 "Model is empty: the ncnn instance is not configured and "
267 "its \"invoke\" method is called. This may be an internal bug of "
268 "nnstreamer or ncnn-subplugin unless if you have directly accessed "
271 // make extractor instance for each inference
272 ncnn::Extractor ex = net.create_extractor ();
274 // get input layer indices
275 std::vector<std::thread> input_thrs;
276 const std::vector<int> &input_indexes = net.input_indexes ();
278 // get input from input tensor and push to the network
279 const char *input_data = (const char *) input->data;
280 for (guint i = 0; i < inputInfo.num_tensors; i++) {
281 ncnn::Mat &in = input_mats.at (i);
282 const uint32_t num_bytes = (in.elembits () / 8) * in.total ();
283 input_thrs.emplace_back (input_thread, std::ref (ex), input_indexes.at (i),
284 std::ref (in), input_data, num_bytes);
285 input_data += num_bytes;
289 for (std::thread &thr : input_thrs)
292 // get output layer indices
293 std::vector<std::thread> output_thrs;
294 const std::vector<int> &output_indexes = net.output_indexes ();
296 if (use_yolo_decoder) {
297 // get output and store to ncnn matrix
298 for (guint i = 0; i < outputInfo.num_tensors; i++) {
299 ncnn::Mat &out = output_mats.at (i);
300 output_thrs.emplace_back (extract_thread, std::ref (ex),
301 output_indexes.at (i), std::ref (out), nullptr, 0);
304 // memset output to zero and hide latency by multithreading
305 memset (output->data, 0, output->size);
308 for (std::thread &thr : output_thrs)
311 // write detection-box infos to the output tensor
312 for (guint i = 0; i < outputInfo.num_tensors; i++) {
313 ncnn::Mat &out = output_mats.at (i);
314 const int label_count
315 = gst_tensors_info_get_nth_info (&outputInfo, i)->dimension[0];
316 float *output_data = (float *) output->data;
317 for (int j = 0; j < out.h; j++) {
318 float *values = out.row (j);
319 values[2] = fmaxf (fminf (values[2], 1.0), 0.0);
320 values[3] = fmaxf (fminf (values[3], 1.0), 0.0);
321 values[4] = fmaxf (fminf (values[4], 1.0), 0.0);
322 values[5] = fmaxf (fminf (values[5], 1.0), 0.0);
324 output_data[0] = (values[2] + values[4]) / 2;
325 output_data[1] = (values[3] + values[5]) / 2;
326 output_data[2] = values[4] - values[2];
327 output_data[3] = values[5] - values[3];
328 output_data[4] = values[1];
329 output_data[5 + (int) values[0]] = 1;
330 output_data += label_count;
334 // get output and store to the output tensor
335 char *output_data = (char *) output->data;
336 for (guint i = 0; i < outputInfo.num_tensors; i++) {
337 ncnn::Mat &out = output_mats.at (i);
338 const uint32_t num_bytes = (out.elembits () / 8) * out.total ();
339 output_thrs.emplace_back (extract_thread, std::ref (ex),
340 output_indexes.at (i), std::ref (out), output_data, num_bytes);
341 output_data += num_bytes;
345 for (std::thread &thr : output_thrs)
351 * @brief Get ncnn frameworks info
354 ncnn_subplugin::getFrameworkInfo (GstTensorFilterFrameworkInfo &info)
356 info = ncnn_subplugin::info;
360 * @brief Get ncnn model information
363 ncnn_subplugin::getModelInfo (
364 model_info_ops ops, GstTensorsInfo &in_info, GstTensorsInfo &out_info)
367 case GET_IN_OUT_INFO:
368 gst_tensors_info_copy (std::addressof (in_info), std::addressof (inputInfo));
369 gst_tensors_info_copy (std::addressof (out_info), std::addressof (outputInfo));
379 * @brief Method to handle the event
382 ncnn_subplugin::eventHandler (event_ops ops, GstTensorFilterFrameworkEventData &data)
390 * @brief Parse custom prop and set instance options accordingly.
393 ncnn_subplugin::parseCustomProperties (const GstTensorFilterProperties *prop)
395 using uniq_g_strv = std::unique_ptr<gchar *, std::function<void (gchar **)>>;
396 const char *custom_props = prop->custom_properties;
398 // set default values
399 use_yolo_decoder = false;
402 // split with , to parse options
403 uniq_g_strv options (g_strsplit (custom_props, ",", -1), g_strfreev);
404 guint len = g_strv_length (options.get ());
406 for (guint i = 0; i < len; i++) {
407 // split with = to parse single option
408 uniq_g_strv option (g_strsplit (options.get ()[i], ":", -1), g_strfreev);
410 // we only have key=value form option
411 if (g_strv_length (option.get ()) == 2) {
412 g_strstrip (option.get ()[0]);
413 g_strstrip (option.get ()[1]);
415 if (g_ascii_strcasecmp (option.get ()[0], "use_yolo_decoder") == 0) {
416 // true or false (default) only
417 if (g_ascii_strcasecmp (option.get ()[1], "true") == 0) {
418 use_yolo_decoder = true;
419 } else if (g_ascii_strcasecmp (option.get ()[1], "false") == 0) {
420 use_yolo_decoder = false;
422 throw std::invalid_argument ("Invalid option for use_yolo_decoder: "
423 + std::string (option.get ()[1]) + ".");
426 throw std::invalid_argument (
427 "Unsupported custom property: " + std::string (option.get ()[0]) + ".");
430 throw std::invalid_argument (
431 "Unsupported custom property: " + std::string (options.get ()[i]) + ".");
438 * @brief Worker function when inserting inputs to the input layer.
441 ncnn_subplugin::input_thread (ncnn::Extractor &ex, const int idx,
442 const ncnn::Mat &in, const void *input_data, const uint32_t num_bytes)
444 // copy from the input matrix
445 memcpy (in.data, input_data, num_bytes);
447 // input to the network
452 * @brief Worker function when getting result from the output layer.
455 ncnn_subplugin::extract_thread (ncnn::Extractor &ex, const int idx,
456 ncnn::Mat &out, void *output_data, const uint32_t num_bytes)
458 // output from the network
459 ex.extract (idx, out);
461 // copy to the output matrix
463 memcpy (output_data, out.data, num_bytes);
466 ncnn_subplugin *ncnn_subplugin::registeredRepresentation = nullptr;
469 * @brief Initialize the object for runtime register
472 ncnn_subplugin::init_filter_ncnn (void)
474 registeredRepresentation
475 = tensor_filter_subplugin::register_subplugin<ncnn_subplugin> ();
479 * @brief Destruct the subplugin
482 ncnn_subplugin::fini_filter_ncnn (void)
484 assert (registeredRepresentation != nullptr);
485 tensor_filter_subplugin::unregister_subplugin (registeredRepresentation);
494 ncnn_subplugin::init_filter_ncnn ();
503 ncnn_subplugin::fini_filter_ncnn ();
506 } // namespace tensorfilter_ncnn
507 } /* namespace nnstreamer */