From: Jaeyun Date: Mon, 15 Apr 2019 11:17:03 +0000 (+0900) Subject: [Transform/Orc] data ptr in orc function X-Git-Tag: accepted/tizen/unified/20190425.014439~18 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=acc07879341f70350119f1d099a4607554223bf2;p=platform%2Fupstream%2Fnnstreamer.git [Transform/Orc] data ptr in orc function For add,mul,div op in orc functions, use single array ptr to handle operator. Orc generated functions have __restrict__ ptr, the pointers overlapping memory regions may cause an error. To avoid this, pass single array ptr to handle each operator. Signed-off-by: Jaeyun Jung --- diff --git a/gst/nnstreamer/tensor_transform/tensor_transform.c b/gst/nnstreamer/tensor_transform/tensor_transform.c index a0164ef..f0af5d1 100644 --- a/gst/nnstreamer/tensor_transform/tensor_transform.c +++ b/gst/nnstreamer/tensor_transform/tensor_transform.c @@ -360,43 +360,42 @@ gst_tensor_transform_get_stand_mode (const gchar * str) } \ } while (0) -#define orc_operator_func(i,o,n,v,opfunc) do { \ +#define orc_operator_func(i,n,v,opfunc) do { \ switch ((v)->type) { \ - case _NNS_INT32: opfunc (s32) ((gpointer) o, (gpointer) i, (v)->data._int32_t, n); break; \ - case _NNS_UINT32: opfunc (u32) ((gpointer) o, (gpointer) i, (v)->data._uint32_t, n); break; \ - case _NNS_INT16: opfunc (s16) ((gpointer) o, (gpointer) i, (v)->data._int16_t, n); break; \ - case _NNS_UINT16: opfunc (u16) ((gpointer) o, (gpointer) i, (v)->data._uint16_t, n); break; \ - case _NNS_INT8: opfunc (s8) ((gpointer) o, (gpointer) i, (v)->data._int8_t, n); break; \ - case _NNS_UINT8: opfunc (u8) ((gpointer) o, (gpointer) i, (v)->data._uint8_t, n); break; \ - case _NNS_FLOAT64: opfunc (f64) ((gpointer) o, (gpointer) i, (v)->data._double, n); break; \ - case _NNS_FLOAT32: opfunc (f32) ((gpointer) o, (gpointer) i, (v)->data._float, n); break; \ + case _NNS_INT32: opfunc (s32) ((gpointer) i, (v)->data._int32_t, n); break; \ + case _NNS_UINT32: opfunc (u32) ((gpointer) i, (v)->data._uint32_t, n); break; \ + case _NNS_INT16: opfunc (s16) ((gpointer) i, (v)->data._int16_t, n); break; \ + case _NNS_UINT16: opfunc (u16) ((gpointer) i, (v)->data._uint16_t, n); break; \ + case _NNS_INT8: opfunc (s8) ((gpointer) i, (v)->data._int8_t, n); break; \ + case _NNS_UINT8: opfunc (u8) ((gpointer) i, (v)->data._uint8_t, n); break; \ + case _NNS_FLOAT64: opfunc (f64) ((gpointer) i, (v)->data._double, n); break; \ + case _NNS_FLOAT32: opfunc (f32) ((gpointer) i, (v)->data._float, n); break; \ default: GST_ERROR_OBJECT (filter, "Unsupported type %d", (v)->type); g_assert (0); break; \ } \ } while (0) -#define orc_operator_div_loop(i,o,n,val,typename) do { \ +#define orc_operator_div_loop(i,n,val,typename) do { \ gsize idx; \ typename *data_in = (typename *) (i); \ - typename *data_out = (typename *) (o); \ for (idx = 0; idx < (n); ++idx) { \ - data_out[idx] = data_in[idx] / (val); \ + data_in[idx] = data_in[idx] / (val); \ } \ } while (0) -#define orc_operator(i,o,n,v,op) do { \ +#define orc_operator(i,n,v,op) do { \ switch (op) { \ - case GTT_OP_ADD: orc_operator_func (i, o, n, v, orc_func_add); break; \ - case GTT_OP_MUL: orc_operator_func (i, o, n, v, orc_func_mul); break; \ + case GTT_OP_ADD: orc_operator_func (i, n, v, orc_func_add); break; \ + case GTT_OP_MUL: orc_operator_func (i, n, v, orc_func_mul); break; \ case GTT_OP_DIV: \ switch ((v)->type) { \ - case _NNS_INT32: orc_operator_div_loop (i, o, n, (v)->data._int32_t, int32_t); break; \ - case _NNS_UINT32: orc_operator_div_loop (i, o, n, (v)->data._uint32_t, uint32_t); break; \ - case _NNS_INT16: orc_operator_div_loop (i, o, n, (v)->data._int16_t, int16_t); break; \ - case _NNS_UINT16: orc_operator_div_loop (i, o, n, (v)->data._uint16_t, uint16_t); break; \ - case _NNS_INT8: orc_operator_div_loop (i, o, n, (v)->data._int8_t, int8_t); break; \ - case _NNS_UINT8: orc_operator_div_loop (i, o, n, (v)->data._uint8_t, uint8_t); break; \ - case _NNS_FLOAT64: orc_func_div (f64) ((gpointer) o, (gpointer) i, (v)->data._double, n); break; \ - case _NNS_FLOAT32: orc_func_div (f32) ((gpointer) o, (gpointer) i, (v)->data._float, n); break; \ + case _NNS_INT32: orc_operator_div_loop (i, n, (v)->data._int32_t, int32_t); break; \ + case _NNS_UINT32: orc_operator_div_loop (i, n, (v)->data._uint32_t, uint32_t); break; \ + case _NNS_INT16: orc_operator_div_loop (i, n, (v)->data._int16_t, int16_t); break; \ + case _NNS_UINT16: orc_operator_div_loop (i, n, (v)->data._uint16_t, uint16_t); break; \ + case _NNS_INT8: orc_operator_div_loop (i, n, (v)->data._int8_t, int8_t); break; \ + case _NNS_UINT8: orc_operator_div_loop (i, n, (v)->data._uint8_t, uint8_t); break; \ + case _NNS_FLOAT64: orc_func_div (f64) ((gpointer) i, (v)->data._double, n); break; \ + case _NNS_FLOAT32: orc_func_div (f32) ((gpointer) i, (v)->data._float, n); break; \ default: GST_ERROR_OBJECT (filter, "Unsupported type %d", (v)->type); g_assert (0); break; \ } \ break; \ @@ -1156,21 +1155,13 @@ gst_tensor_transform_arithmetic (GstTensorTransform * filter, #ifdef HAVE_ORC if (orc_supported (filter)) { - uint8_t *srcptr = (uint8_t *) inptr; - walk = filter->operators; - op_s = (tensor_transform_operator_s *) walk->data; - if (op_s->op == GTT_OP_TYPECAST) { - /** - * Typecast should be called at the first. - * Do the typecast. If in/out type is same, this will copy the input array to output. - */ - orc_typecast (inptr, outptr, num, in_tensor_type, out_tensor_type); - srcptr = outptr; - - walk = g_slist_next (walk); - } + /** + * Typecast should be called at the first. + * Do the typecast. If in/out type is same, this will copy the input array to output. + */ + orc_typecast (inptr, outptr, num, in_tensor_type, out_tensor_type); while (walk) { op_s = (tensor_transform_operator_s *) walk->data; @@ -1178,8 +1169,7 @@ gst_tensor_transform_arithmetic (GstTensorTransform * filter, if (op_s->op != GTT_OP_TYPECAST) { gst_tensor_transform_typecast_value (filter, &op_s->value, out_tensor_type); - orc_operator (srcptr, outptr, num, &op_s->value, op_s->op); - srcptr = outptr; + orc_operator (outptr, num, &op_s->value, op_s->op); } walk = g_slist_next (walk); diff --git a/gst/nnstreamer/tensor_transform/transform-orc.orc b/gst/nnstreamer/tensor_transform/transform-orc.orc index d248f26..34ec82c 100644 --- a/gst/nnstreamer/tensor_transform/transform-orc.orc +++ b/gst/nnstreamer/tensor_transform/transform-orc.orc @@ -1,18 +1,16 @@ .function nns_orc_add_c_s8 .dest 1 d1 int8_t -.source 1 s1 int8_t .param 1 p1 int8_t -addssb d1, s1, p1 +addssb d1, d1, p1 .function nns_orc_mul_c_s8 .dest 1 d1 int8_t -.source 1 s1 int8_t .param 1 p1 int8_t .temp 2 t1 -mulsbw t1, s1, p1 +mulsbw t1, d1, p1 convssswb d1, t1 @@ -86,19 +84,17 @@ convld d1, t2 .function nns_orc_add_c_u8 .dest 1 d1 uint8_t -.source 1 s1 uint8_t .param 1 p1 uint8_t -addusb d1, s1, p1 +addusb d1, d1, p1 .function nns_orc_mul_c_u8 .dest 1 d1 uint8_t -.source 1 s1 uint8_t .param 1 p1 uint8_t .temp 2 t1 -mulubw t1, s1, p1 +mulubw t1, d1, p1 convuuswb d1, t1 @@ -172,19 +168,17 @@ convld d1, t2 .function nns_orc_add_c_s16 .dest 2 d1 int16_t -.source 2 s1 int16_t .param 2 p1 int16_t -addssw d1, s1, p1 +addssw d1, d1, p1 .function nns_orc_mul_c_s16 .dest 2 d1 int16_t -.source 2 s1 int16_t .param 2 p1 int16_t .temp 4 t1 -mulswl t1, s1, p1 +mulswl t1, d1, p1 convssslw d1, t1 @@ -251,19 +245,17 @@ convld d1, t1 .function nns_orc_add_c_u16 .dest 2 d1 uint16_t -.source 2 s1 uint16_t .param 2 p1 uint16_t -addusw d1, s1, p1 +addusw d1, d1, p1 .function nns_orc_mul_c_u16 .dest 2 d1 uint16_t -.source 2 s1 uint16_t .param 2 p1 uint16_t .temp 4 t1 -muluwl t1, s1, p1 +muluwl t1, d1, p1 convuuslw d1, t1 @@ -329,19 +321,17 @@ convld d1, t1 .function nns_orc_add_c_s32 .dest 4 d1 int32_t -.source 4 s1 int32_t .param 4 p1 int32_t -addssl d1, s1, p1 +addssl d1, d1, p1 .function nns_orc_mul_c_s32 .dest 4 d1 int32_t -.source 4 s1 int32_t .param 4 p1 int32_t .temp 8 t1 -mulslq t1, s1, p1 +mulslq t1, d1, p1 convsssql d1, t1 @@ -408,19 +398,17 @@ convld d1, s1 .function nns_orc_add_c_u32 .dest 4 d1 uint32_t -.source 4 s1 uint32_t .param 4 p1 uint32_t -addusl d1, s1, p1 +addusl d1, d1, p1 .function nns_orc_mul_c_u32 .dest 4 d1 uint32_t -.source 4 s1 uint32_t .param 4 p1 uint32_t .temp 8 t1 -mululq t1, s1, p1 +mululq t1, d1, p1 convuusql d1, t1 @@ -486,26 +474,23 @@ convld d1, s1 .function nns_orc_add_c_f32 .dest 4 d1 float -.source 4 s1 float .floatparam 4 p1 float -addf d1, s1, p1 +addf d1, d1, p1 .function nns_orc_mul_c_f32 .dest 4 d1 float -.source 4 s1 float .floatparam 4 p1 float -mulf d1, s1, p1 +mulf d1, d1, p1 .function nns_orc_div_c_f32 .dest 4 d1 float -.source 4 s1 float .floatparam 4 p1 float -divf d1, s1, p1 +divf d1, d1, p1 .function nns_orc_conv_f32_to_s8 @@ -578,26 +563,23 @@ convfd d1, s1 .function nns_orc_add_c_f64 .dest 8 d1 double -.source 8 s1 double .doubleparam 8 p1 double -addd d1, s1, p1 +addd d1, d1, p1 .function nns_orc_mul_c_f64 .dest 8 d1 double -.source 8 s1 double .doubleparam 8 p1 double -muld d1, s1, p1 +muld d1, d1, p1 .function nns_orc_div_c_f64 .dest 8 d1 double -.source 8 s1 double .doubleparam 8 p1 double -divd d1, s1, p1 +divd d1, d1, p1 .function nns_orc_conv_f64_to_s8 diff --git a/tests/nnstreamer_plugins/unittest_plugins.cpp b/tests/nnstreamer_plugins/unittest_plugins.cpp index f7c7912..4347aa7 100644 --- a/tests/nnstreamer_plugins/unittest_plugins.cpp +++ b/tests/nnstreamer_plugins/unittest_plugins.cpp @@ -1598,7 +1598,7 @@ TEST (test_tensor_transform, orc_add) data_s8[i] = i - 1; } - nns_orc_add_c_s8 (data_s8, data_s8, -20, array_size); + nns_orc_add_c_s8 (data_s8, -20, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s8[i], i - 1 - 20); @@ -1608,7 +1608,7 @@ TEST (test_tensor_transform, orc_add) data_s8[i] = i + 1; } - nns_orc_add_c_s8 (data_s8, data_s8, 20, array_size); + nns_orc_add_c_s8 (data_s8, 20, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s8[i], i + 1 + 20); @@ -1621,7 +1621,7 @@ TEST (test_tensor_transform, orc_add) data_u8[i] = i + 1; } - nns_orc_add_c_u8 (data_u8, data_u8, 3, array_size); + nns_orc_add_c_u8 (data_u8, 3, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_u8[i], i + 1 + 3); @@ -1634,7 +1634,7 @@ TEST (test_tensor_transform, orc_add) data_s16[i] = i - 1; } - nns_orc_add_c_s16 (data_s16, data_s16, -16, array_size); + nns_orc_add_c_s16 (data_s16, -16, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s16[i], i - 1 - 16); @@ -1644,7 +1644,7 @@ TEST (test_tensor_transform, orc_add) data_s16[i] = i + 1; } - nns_orc_add_c_s16 (data_s16, data_s16, 16, array_size); + nns_orc_add_c_s16 (data_s16, 16, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s16[i], i + 1 + 16); @@ -1657,7 +1657,7 @@ TEST (test_tensor_transform, orc_add) data_u16[i] = i + 1; } - nns_orc_add_c_u16 (data_u16, data_u16, 17, array_size); + nns_orc_add_c_u16 (data_u16, 17, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_u16[i], i + 1 + 17); @@ -1670,7 +1670,7 @@ TEST (test_tensor_transform, orc_add) data_s32[i] = i + 1; } - nns_orc_add_c_s32 (data_s32, data_s32, -32, array_size); + nns_orc_add_c_s32 (data_s32, -32, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s32[i], i + 1 - 32); @@ -1680,7 +1680,7 @@ TEST (test_tensor_transform, orc_add) data_s32[i] = i + 1; } - nns_orc_add_c_s32 (data_s32, data_s32, 32, array_size); + nns_orc_add_c_s32 (data_s32, 32, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s32[i], i + 1 + 32); @@ -1693,7 +1693,7 @@ TEST (test_tensor_transform, orc_add) data_u32[i] = i + 1; } - nns_orc_add_c_u32 (data_u32, data_u32, 33, array_size); + nns_orc_add_c_u32 (data_u32, 33, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_u32[i], i + 1 + 33); @@ -1706,7 +1706,7 @@ TEST (test_tensor_transform, orc_add) data_f32[i] = i - .1; } - nns_orc_add_c_f32 (data_f32, data_f32, -10.2, array_size); + nns_orc_add_c_f32 (data_f32, -10.2, array_size); for (i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ (data_f32[i], i - .1 - 10.2); @@ -1716,7 +1716,7 @@ TEST (test_tensor_transform, orc_add) data_f32[i] = i + .1; } - nns_orc_add_c_f32 (data_f32, data_f32, 10.2, array_size); + nns_orc_add_c_f32 (data_f32, 10.2, array_size); for (i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ (data_f32[i], i + .1 + 10.2); @@ -1729,7 +1729,7 @@ TEST (test_tensor_transform, orc_add) data_f64[i] = i - .1; } - nns_orc_add_c_f64 (data_f64, data_f64, -20.5, array_size); + nns_orc_add_c_f64 (data_f64, -20.5, array_size); for (i = 0; i < array_size; i++) { EXPECT_DOUBLE_EQ (data_f64[i], i - .1 - 20.5); @@ -1739,7 +1739,7 @@ TEST (test_tensor_transform, orc_add) data_f64[i] = i + .2; } - nns_orc_add_c_f64 (data_f64, data_f64, 20.5, array_size); + nns_orc_add_c_f64 (data_f64, 20.5, array_size); for (i = 0; i < array_size; i++) { EXPECT_DOUBLE_EQ (data_f64[i], i + .2 + 20.5); @@ -1761,7 +1761,7 @@ TEST (test_tensor_transform, orc_mul) data_s8[i] = i + 1; } - nns_orc_mul_c_s8 (data_s8, data_s8, -3, array_size); + nns_orc_mul_c_s8 (data_s8, -3, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s8[i], (i + 1) * (-3)); @@ -1771,7 +1771,7 @@ TEST (test_tensor_transform, orc_mul) data_s8[i] = i + 1; } - nns_orc_mul_c_s8 (data_s8, data_s8, 5, array_size); + nns_orc_mul_c_s8 (data_s8, 5, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s8[i], (i + 1) * 5); @@ -1784,7 +1784,7 @@ TEST (test_tensor_transform, orc_mul) data_u8[i] = i + 1; } - nns_orc_mul_c_u8 (data_u8, data_u8, 3, array_size); + nns_orc_mul_c_u8 (data_u8, 3, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_u8[i], (i + 1) * 3); @@ -1797,7 +1797,7 @@ TEST (test_tensor_transform, orc_mul) data_s16[i] = i + 1; } - nns_orc_mul_c_s16 (data_s16, data_s16, -16, array_size); + nns_orc_mul_c_s16 (data_s16, -16, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s16[i], (i + 1) * (-16)); @@ -1807,7 +1807,7 @@ TEST (test_tensor_transform, orc_mul) data_s16[i] = i + 1; } - nns_orc_mul_c_s16 (data_s16, data_s16, 16, array_size); + nns_orc_mul_c_s16 (data_s16, 16, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s16[i], (i + 1) * 16); @@ -1820,7 +1820,7 @@ TEST (test_tensor_transform, orc_mul) data_u16[i] = i + 1; } - nns_orc_mul_c_u16 (data_u16, data_u16, 17, array_size); + nns_orc_mul_c_u16 (data_u16, 17, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_u16[i], (i + 1) * 17); @@ -1833,7 +1833,7 @@ TEST (test_tensor_transform, orc_mul) data_s32[i] = i + 1; } - nns_orc_mul_c_s32 (data_s32, data_s32, -32, array_size); + nns_orc_mul_c_s32 (data_s32, -32, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s32[i], (i + 1) * (-32)); @@ -1843,7 +1843,7 @@ TEST (test_tensor_transform, orc_mul) data_s32[i] = i + 1; } - nns_orc_mul_c_s32 (data_s32, data_s32, 32, array_size); + nns_orc_mul_c_s32 (data_s32, 32, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s32[i], (i + 1) * 32); @@ -1856,7 +1856,7 @@ TEST (test_tensor_transform, orc_mul) data_u32[i] = i + 1; } - nns_orc_mul_c_u32 (data_u32, data_u32, 33, array_size); + nns_orc_mul_c_u32 (data_u32, 33, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_u32[i], (i + 1) * 33); @@ -1869,7 +1869,7 @@ TEST (test_tensor_transform, orc_mul) data_f32[i] = i + 1 - .1; } - nns_orc_mul_c_f32 (data_f32, data_f32, -10.2, array_size); + nns_orc_mul_c_f32 (data_f32, -10.2, array_size); for (i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ (data_f32[i], (i + 1 - .1) * (-10.2)); @@ -1879,7 +1879,7 @@ TEST (test_tensor_transform, orc_mul) data_f32[i] = i + .1; } - nns_orc_mul_c_f32 (data_f32, data_f32, 10.2, array_size); + nns_orc_mul_c_f32 (data_f32, 10.2, array_size); for (i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ (data_f32[i], (i + .1) * 10.2); @@ -1892,7 +1892,7 @@ TEST (test_tensor_transform, orc_mul) data_f64[i] = i + 1 - .1; } - nns_orc_mul_c_f64 (data_f64, data_f64, -20.5, array_size); + nns_orc_mul_c_f64 (data_f64, -20.5, array_size); for (i = 0; i < array_size; i++) { EXPECT_DOUBLE_EQ (data_f64[i], (i + 1 - .1) * (-20.5)); @@ -1902,7 +1902,7 @@ TEST (test_tensor_transform, orc_mul) data_f64[i] = i + .2; } - nns_orc_mul_c_f64 (data_f64, data_f64, 20.5, array_size); + nns_orc_mul_c_f64 (data_f64, 20.5, array_size); for (i = 0; i < array_size; i++) { EXPECT_DOUBLE_EQ (data_f64[i], (i + .2) * 20.5); @@ -1924,7 +1924,7 @@ TEST (test_tensor_transform, orc_div) data_f32[i] = i + 1 - .1; } - nns_orc_div_c_f32 (data_f32, data_f32, -2.2, array_size); + nns_orc_div_c_f32 (data_f32, -2.2, array_size); for (i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ (data_f32[i], (i + 1 - .1) / (-2.2)); @@ -1934,7 +1934,7 @@ TEST (test_tensor_transform, orc_div) data_f32[i] = i + 10.1; } - nns_orc_div_c_f32 (data_f32, data_f32, 10.2, array_size); + nns_orc_div_c_f32 (data_f32, 10.2, array_size); for (i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ (data_f32[i], (i + 10.1) / 10.2); @@ -1947,7 +1947,7 @@ TEST (test_tensor_transform, orc_div) data_f64[i] = i + 1 - .1; } - nns_orc_div_c_f64 (data_f64, data_f64, -10.5, array_size); + nns_orc_div_c_f64 (data_f64, -10.5, array_size); for (i = 0; i < array_size; i++) { EXPECT_DOUBLE_EQ (data_f64[i], (i + 1 - .1) / (-10.5)); @@ -1957,7 +1957,7 @@ TEST (test_tensor_transform, orc_div) data_f64[i] = i + .2; } - nns_orc_div_c_f64 (data_f64, data_f64, 5.5, array_size); + nns_orc_div_c_f64 (data_f64, 5.5, array_size); for (i = 0; i < array_size; i++) { EXPECT_DOUBLE_EQ (data_f64[i], (i + .2) / 5.5); @@ -2681,7 +2681,7 @@ TEST (test_tensor_transform, orc_performance) /* orc add u8 */ start_ts = g_get_real_time (); - nns_orc_add_c_u8 (data_u8, data_u8, 2, array_size); + nns_orc_add_c_u8 (data_u8, 2, array_size); stop_ts = g_get_real_time (); diff_orc = stop_ts - start_ts; @@ -2703,7 +2703,7 @@ TEST (test_tensor_transform, orc_performance) /* orc mul u8 */ start_ts = g_get_real_time (); - nns_orc_mul_c_u8 (data_u8, data_u8, 2, array_size); + nns_orc_mul_c_u8 (data_u8, 2, array_size); stop_ts = g_get_real_time (); diff_orc = stop_ts - start_ts; @@ -2747,7 +2747,7 @@ TEST (test_tensor_transform, orc_performance) /* orc div f32 */ start_ts = g_get_real_time (); - nns_orc_div_c_f32 (data_float, data_float, 2., array_size); + nns_orc_div_c_f32 (data_float, 2., array_size); stop_ts = g_get_real_time (); diff_orc = stop_ts - start_ts; @@ -2769,7 +2769,7 @@ TEST (test_tensor_transform, orc_performance) /* orc mul f32 */ start_ts = g_get_real_time (); - nns_orc_mul_c_f32 (data_float, data_float, 2., array_size); + nns_orc_mul_c_f32 (data_float, 2., array_size); stop_ts = g_get_real_time (); diff_orc = stop_ts - start_ts; @@ -2791,7 +2791,7 @@ TEST (test_tensor_transform, orc_performance) /* orc add f32 */ start_ts = g_get_real_time (); - nns_orc_add_c_f32 (data_float, data_float, 2., array_size); + nns_orc_add_c_f32 (data_float, 2., array_size); stop_ts = g_get_real_time (); diff_orc = stop_ts - start_ts; @@ -2819,8 +2819,8 @@ TEST (test_tensor_transform, orc_performance) /* orc typecast - add - mul */ start_ts = g_get_real_time (); nns_orc_conv_u8_to_f32 (data_float, data_u8, array_size); - nns_orc_add_c_f32 (data_float, data_float, .2, array_size); - nns_orc_mul_c_f32 (data_float, data_float, 1.2, array_size); + nns_orc_add_c_f32 (data_float, .2, array_size); + nns_orc_mul_c_f32 (data_float, 1.2, array_size); stop_ts = g_get_real_time (); diff_orc = stop_ts - start_ts;