From: Jaeyun Jung Date: Mon, 10 Dec 2018 14:50:12 +0000 (+0900) Subject: [Transform/Orc] remove unnecessary memcpy X-Git-Tag: v0.1.0~72 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6f36f26b09f99f2507c0a22abf7238e186ed1391;p=platform%2Fupstream%2Fnnstreamer.git [Transform/Orc] remove unnecessary memcpy Related issue: #975 In arith mode, copying input array to output is unnecessary. To fix this, add source in orc functions and set the data ptr for each operator. Signed-off-by: Jaeyun Jung --- diff --git a/gst/tensor_transform/tensor_transform.c b/gst/tensor_transform/tensor_transform.c index 92a61a9..d0dd98f 100644 --- a/gst/tensor_transform/tensor_transform.c +++ b/gst/tensor_transform/tensor_transform.c @@ -338,42 +338,43 @@ gst_tensor_transform_get_mode (const gchar * str) } \ } while (0) -#define orc_operator_func(i,n,v,opfunc) do { \ +#define orc_operator_func(i,o,n,v,opfunc) do { \ switch ((v)->type) { \ - case _NNS_INT32: opfunc (s32) ((gpointer) i, (v)->data._int32_t, n); break; \ - case _NNS_UINT32: opfunc (u32) ((gpointer) i, (v)->data._uint32_t, n); break; \ - case _NNS_INT16: opfunc (s16) ((gpointer) i, (v)->data._int16_t, n); break; \ - case _NNS_UINT16: opfunc (u16) ((gpointer) i, (v)->data._uint16_t, n); break; \ - case _NNS_INT8: opfunc (s8) ((gpointer) i, (v)->data._int8_t, n); break; \ - case _NNS_UINT8: opfunc (u8) ((gpointer) i, (v)->data._uint8_t, n); break; \ - case _NNS_FLOAT64: opfunc (f64) ((gpointer) i, (v)->data._double, n); break; \ - case _NNS_FLOAT32: opfunc (f32) ((gpointer) i, (v)->data._float, n); break; \ + case _NNS_INT32: opfunc (s32) ((gpointer) o, (gpointer) i, (v)->data._int32_t, n); break; \ + case _NNS_UINT32: opfunc (u32) ((gpointer) o, (gpointer) i, (v)->data._uint32_t, n); break; \ + case _NNS_INT16: opfunc (s16) ((gpointer) o, (gpointer) i, (v)->data._int16_t, n); break; \ + case _NNS_UINT16: opfunc (u16) ((gpointer) o, (gpointer) i, (v)->data._uint16_t, n); break; \ + case _NNS_INT8: opfunc (s8) ((gpointer) o, (gpointer) i, (v)->data._int8_t, n); break; \ + case _NNS_UINT8: opfunc (u8) ((gpointer) o, (gpointer) i, (v)->data._uint8_t, n); break; \ + case _NNS_FLOAT64: opfunc (f64) ((gpointer) o, (gpointer) i, (v)->data._double, n); break; \ + case _NNS_FLOAT32: opfunc (f32) ((gpointer) o, (gpointer) i, (v)->data._float, n); break; \ default: GST_ERROR_OBJECT (filter, "Unsupported type %d", (v)->type); g_assert (0); break; \ } \ } while (0) -#define orc_operator_div_loop(i,n,val,typename) do { \ +#define orc_operator_div_loop(i,o,n,val,typename) do { \ gsize idx; \ - typename *data_array = (typename *) (i); \ + typename *data_in = (typename *) (i); \ + typename *data_out = (typename *) (o); \ for (idx = 0; idx < (n); ++idx) { \ - data_array[idx] /= (val); \ + data_out[idx] = data_in[idx] / (val); \ } \ } while (0) -#define orc_operator(i,n,v,op) do { \ +#define orc_operator(i,o,n,v,op) do { \ switch (op) { \ - case GTT_OP_ADD: orc_operator_func (i, n, v, orc_func_add); break; \ - case GTT_OP_MUL: orc_operator_func (i, n, v, orc_func_mul); break; \ + case GTT_OP_ADD: orc_operator_func (i, o, n, v, orc_func_add); break; \ + case GTT_OP_MUL: orc_operator_func (i, o, n, v, orc_func_mul); break; \ case GTT_OP_DIV: \ switch ((v)->type) { \ - case _NNS_INT32: orc_operator_div_loop (i, n, (v)->data._int32_t, int32_t); break; \ - case _NNS_UINT32: orc_operator_div_loop (i, n, (v)->data._uint32_t, uint32_t); break; \ - case _NNS_INT16: orc_operator_div_loop (i, n, (v)->data._int16_t, int16_t); break; \ - case _NNS_UINT16: orc_operator_div_loop (i, n, (v)->data._uint16_t, uint16_t); break; \ - case _NNS_INT8: orc_operator_div_loop (i, n, (v)->data._int8_t, int8_t); break; \ - case _NNS_UINT8: orc_operator_div_loop (i, n, (v)->data._uint8_t, uint8_t); break; \ - case _NNS_FLOAT64: orc_func_div (f64) ((gpointer) i, (v)->data._double, n); break; \ - case _NNS_FLOAT32: orc_func_div (f32) ((gpointer) i, (v)->data._float, n); break; \ + case _NNS_INT32: orc_operator_div_loop (i, o, n, (v)->data._int32_t, int32_t); break; \ + case _NNS_UINT32: orc_operator_div_loop (i, o, n, (v)->data._uint32_t, uint32_t); break; \ + case _NNS_INT16: orc_operator_div_loop (i, o, n, (v)->data._int16_t, int16_t); break; \ + case _NNS_UINT16: orc_operator_div_loop (i, o, n, (v)->data._uint16_t, uint16_t); break; \ + case _NNS_INT8: orc_operator_div_loop (i, o, n, (v)->data._int8_t, int8_t); break; \ + case _NNS_UINT8: orc_operator_div_loop (i, o, n, (v)->data._uint8_t, uint8_t); break; \ + case _NNS_FLOAT64: orc_func_div (f64) ((gpointer) o, (gpointer) i, (v)->data._double, n); break; \ + case _NNS_FLOAT32: orc_func_div (f32) ((gpointer) o, (gpointer) i, (v)->data._float, n); break; \ default: GST_ERROR_OBJECT (filter, "Unsupported type %d", (v)->type); g_assert (0); break; \ } \ break; \ @@ -1107,13 +1108,21 @@ gst_tensor_transform_arithmetic (GstTensorTransform * filter, #ifdef HAVE_ORC if (orc_supported (filter)) { + uint8_t *srcptr = (uint8_t *) inptr; + walk = filter->operators; + op_s = (tensor_transform_operator_s *) walk->data; - /** - * Typecast should be called at the first. - * Do the typecast. If in/out type is same, this will copy the input array to output. - */ - orc_typecast (inptr, outptr, num, in_tensor_type, out_tensor_type); + if (op_s->op == GTT_OP_TYPECAST) { + /** + * Typecast should be called at the first. + * Do the typecast. If in/out type is same, this will copy the input array to output. + */ + orc_typecast (inptr, outptr, num, in_tensor_type, out_tensor_type); + srcptr = outptr; + + walk = g_slist_next (walk); + } while (walk) { op_s = (tensor_transform_operator_s *) walk->data; @@ -1121,7 +1130,8 @@ gst_tensor_transform_arithmetic (GstTensorTransform * filter, if (op_s->op != GTT_OP_TYPECAST) { gst_tensor_transform_typecast_value (filter, &op_s->value, out_tensor_type); - orc_operator (outptr, num, &op_s->value, op_s->op); + orc_operator (srcptr, outptr, num, &op_s->value, op_s->op); + srcptr = outptr; } walk = g_slist_next (walk); diff --git a/gst/tensor_transform/transform-orc.orc b/gst/tensor_transform/transform-orc.orc index 34ec82c..d248f26 100644 --- a/gst/tensor_transform/transform-orc.orc +++ b/gst/tensor_transform/transform-orc.orc @@ -1,16 +1,18 @@ .function nns_orc_add_c_s8 .dest 1 d1 int8_t +.source 1 s1 int8_t .param 1 p1 int8_t -addssb d1, d1, p1 +addssb d1, s1, p1 .function nns_orc_mul_c_s8 .dest 1 d1 int8_t +.source 1 s1 int8_t .param 1 p1 int8_t .temp 2 t1 -mulsbw t1, d1, p1 +mulsbw t1, s1, p1 convssswb d1, t1 @@ -84,17 +86,19 @@ convld d1, t2 .function nns_orc_add_c_u8 .dest 1 d1 uint8_t +.source 1 s1 uint8_t .param 1 p1 uint8_t -addusb d1, d1, p1 +addusb d1, s1, p1 .function nns_orc_mul_c_u8 .dest 1 d1 uint8_t +.source 1 s1 uint8_t .param 1 p1 uint8_t .temp 2 t1 -mulubw t1, d1, p1 +mulubw t1, s1, p1 convuuswb d1, t1 @@ -168,17 +172,19 @@ convld d1, t2 .function nns_orc_add_c_s16 .dest 2 d1 int16_t +.source 2 s1 int16_t .param 2 p1 int16_t -addssw d1, d1, p1 +addssw d1, s1, p1 .function nns_orc_mul_c_s16 .dest 2 d1 int16_t +.source 2 s1 int16_t .param 2 p1 int16_t .temp 4 t1 -mulswl t1, d1, p1 +mulswl t1, s1, p1 convssslw d1, t1 @@ -245,17 +251,19 @@ convld d1, t1 .function nns_orc_add_c_u16 .dest 2 d1 uint16_t +.source 2 s1 uint16_t .param 2 p1 uint16_t -addusw d1, d1, p1 +addusw d1, s1, p1 .function nns_orc_mul_c_u16 .dest 2 d1 uint16_t +.source 2 s1 uint16_t .param 2 p1 uint16_t .temp 4 t1 -muluwl t1, d1, p1 +muluwl t1, s1, p1 convuuslw d1, t1 @@ -321,17 +329,19 @@ convld d1, t1 .function nns_orc_add_c_s32 .dest 4 d1 int32_t +.source 4 s1 int32_t .param 4 p1 int32_t -addssl d1, d1, p1 +addssl d1, s1, p1 .function nns_orc_mul_c_s32 .dest 4 d1 int32_t +.source 4 s1 int32_t .param 4 p1 int32_t .temp 8 t1 -mulslq t1, d1, p1 +mulslq t1, s1, p1 convsssql d1, t1 @@ -398,17 +408,19 @@ convld d1, s1 .function nns_orc_add_c_u32 .dest 4 d1 uint32_t +.source 4 s1 uint32_t .param 4 p1 uint32_t -addusl d1, d1, p1 +addusl d1, s1, p1 .function nns_orc_mul_c_u32 .dest 4 d1 uint32_t +.source 4 s1 uint32_t .param 4 p1 uint32_t .temp 8 t1 -mululq t1, d1, p1 +mululq t1, s1, p1 convuusql d1, t1 @@ -474,23 +486,26 @@ convld d1, s1 .function nns_orc_add_c_f32 .dest 4 d1 float +.source 4 s1 float .floatparam 4 p1 float -addf d1, d1, p1 +addf d1, s1, p1 .function nns_orc_mul_c_f32 .dest 4 d1 float +.source 4 s1 float .floatparam 4 p1 float -mulf d1, d1, p1 +mulf d1, s1, p1 .function nns_orc_div_c_f32 .dest 4 d1 float +.source 4 s1 float .floatparam 4 p1 float -divf d1, d1, p1 +divf d1, s1, p1 .function nns_orc_conv_f32_to_s8 @@ -563,23 +578,26 @@ convfd d1, s1 .function nns_orc_add_c_f64 .dest 8 d1 double +.source 8 s1 double .doubleparam 8 p1 double -addd d1, d1, p1 +addd d1, s1, p1 .function nns_orc_mul_c_f64 .dest 8 d1 double +.source 8 s1 double .doubleparam 8 p1 double -muld d1, d1, p1 +muld d1, s1, p1 .function nns_orc_div_c_f64 .dest 8 d1 double +.source 8 s1 double .doubleparam 8 p1 double -divd d1, d1, p1 +divd d1, s1, p1 .function nns_orc_conv_f64_to_s8 diff --git a/tests/nnstreamer_plugins/unittest_plugins.cpp b/tests/nnstreamer_plugins/unittest_plugins.cpp index 8b92a18..750b33f 100644 --- a/tests/nnstreamer_plugins/unittest_plugins.cpp +++ b/tests/nnstreamer_plugins/unittest_plugins.cpp @@ -1791,7 +1791,7 @@ TEST (test_tensor_transform, orc_add) data_s8[i] = i - 1; } - nns_orc_add_c_s8 (data_s8, -20, array_size); + nns_orc_add_c_s8 (data_s8, data_s8, -20, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s8[i], i - 1 - 20); @@ -1801,7 +1801,7 @@ TEST (test_tensor_transform, orc_add) data_s8[i] = i + 1; } - nns_orc_add_c_s8 (data_s8, 20, array_size); + nns_orc_add_c_s8 (data_s8, data_s8, 20, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s8[i], i + 1 + 20); @@ -1814,7 +1814,7 @@ TEST (test_tensor_transform, orc_add) data_u8[i] = i + 1; } - nns_orc_add_c_u8 (data_u8, 3, array_size); + nns_orc_add_c_u8 (data_u8, data_u8, 3, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_u8[i], i + 1 + 3); @@ -1827,7 +1827,7 @@ TEST (test_tensor_transform, orc_add) data_s16[i] = i - 1; } - nns_orc_add_c_s16 (data_s16, -16, array_size); + nns_orc_add_c_s16 (data_s16, data_s16, -16, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s16[i], i - 1 - 16); @@ -1837,7 +1837,7 @@ TEST (test_tensor_transform, orc_add) data_s16[i] = i + 1; } - nns_orc_add_c_s16 (data_s16, 16, array_size); + nns_orc_add_c_s16 (data_s16, data_s16, 16, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s16[i], i + 1 + 16); @@ -1850,7 +1850,7 @@ TEST (test_tensor_transform, orc_add) data_u16[i] = i + 1; } - nns_orc_add_c_u16 (data_u16, 17, array_size); + nns_orc_add_c_u16 (data_u16, data_u16, 17, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_u16[i], i + 1 + 17); @@ -1863,7 +1863,7 @@ TEST (test_tensor_transform, orc_add) data_s32[i] = i + 1; } - nns_orc_add_c_s32 (data_s32, -32, array_size); + nns_orc_add_c_s32 (data_s32, data_s32, -32, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s32[i], i + 1 - 32); @@ -1873,7 +1873,7 @@ TEST (test_tensor_transform, orc_add) data_s32[i] = i + 1; } - nns_orc_add_c_s32 (data_s32, 32, array_size); + nns_orc_add_c_s32 (data_s32, data_s32, 32, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s32[i], i + 1 + 32); @@ -1886,7 +1886,7 @@ TEST (test_tensor_transform, orc_add) data_u32[i] = i + 1; } - nns_orc_add_c_u32 (data_u32, 33, array_size); + nns_orc_add_c_u32 (data_u32, data_u32, 33, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_u32[i], i + 1 + 33); @@ -1899,7 +1899,7 @@ TEST (test_tensor_transform, orc_add) data_f32[i] = i - .1; } - nns_orc_add_c_f32 (data_f32, -10.2, array_size); + nns_orc_add_c_f32 (data_f32, data_f32, -10.2, array_size); for (i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ (data_f32[i], i - .1 - 10.2); @@ -1909,7 +1909,7 @@ TEST (test_tensor_transform, orc_add) data_f32[i] = i + .1; } - nns_orc_add_c_f32 (data_f32, 10.2, array_size); + nns_orc_add_c_f32 (data_f32, data_f32, 10.2, array_size); for (i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ (data_f32[i], i + .1 + 10.2); @@ -1922,7 +1922,7 @@ TEST (test_tensor_transform, orc_add) data_f64[i] = i - .1; } - nns_orc_add_c_f64 (data_f64, -20.5, array_size); + nns_orc_add_c_f64 (data_f64, data_f64, -20.5, array_size); for (i = 0; i < array_size; i++) { EXPECT_DOUBLE_EQ (data_f64[i], i - .1 - 20.5); @@ -1932,7 +1932,7 @@ TEST (test_tensor_transform, orc_add) data_f64[i] = i + .2; } - nns_orc_add_c_f64 (data_f64, 20.5, array_size); + nns_orc_add_c_f64 (data_f64, data_f64, 20.5, array_size); for (i = 0; i < array_size; i++) { EXPECT_DOUBLE_EQ (data_f64[i], i + .2 + 20.5); @@ -1954,7 +1954,7 @@ TEST (test_tensor_transform, orc_mul) data_s8[i] = i + 1; } - nns_orc_mul_c_s8 (data_s8, -3, array_size); + nns_orc_mul_c_s8 (data_s8, data_s8, -3, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s8[i], (i + 1) * (-3)); @@ -1964,7 +1964,7 @@ TEST (test_tensor_transform, orc_mul) data_s8[i] = i + 1; } - nns_orc_mul_c_s8 (data_s8, 5, array_size); + nns_orc_mul_c_s8 (data_s8, data_s8, 5, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s8[i], (i + 1) * 5); @@ -1977,7 +1977,7 @@ TEST (test_tensor_transform, orc_mul) data_u8[i] = i + 1; } - nns_orc_mul_c_u8 (data_u8, 3, array_size); + nns_orc_mul_c_u8 (data_u8, data_u8, 3, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_u8[i], (i + 1) * 3); @@ -1990,7 +1990,7 @@ TEST (test_tensor_transform, orc_mul) data_s16[i] = i + 1; } - nns_orc_mul_c_s16 (data_s16, -16, array_size); + nns_orc_mul_c_s16 (data_s16, data_s16, -16, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s16[i], (i + 1) * (-16)); @@ -2000,7 +2000,7 @@ TEST (test_tensor_transform, orc_mul) data_s16[i] = i + 1; } - nns_orc_mul_c_s16 (data_s16, 16, array_size); + nns_orc_mul_c_s16 (data_s16, data_s16, 16, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s16[i], (i + 1) * 16); @@ -2013,7 +2013,7 @@ TEST (test_tensor_transform, orc_mul) data_u16[i] = i + 1; } - nns_orc_mul_c_u16 (data_u16, 17, array_size); + nns_orc_mul_c_u16 (data_u16, data_u16, 17, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_u16[i], (i + 1) * 17); @@ -2026,7 +2026,7 @@ TEST (test_tensor_transform, orc_mul) data_s32[i] = i + 1; } - nns_orc_mul_c_s32 (data_s32, -32, array_size); + nns_orc_mul_c_s32 (data_s32, data_s32, -32, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s32[i], (i + 1) * (-32)); @@ -2036,7 +2036,7 @@ TEST (test_tensor_transform, orc_mul) data_s32[i] = i + 1; } - nns_orc_mul_c_s32 (data_s32, 32, array_size); + nns_orc_mul_c_s32 (data_s32, data_s32, 32, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_s32[i], (i + 1) * 32); @@ -2049,7 +2049,7 @@ TEST (test_tensor_transform, orc_mul) data_u32[i] = i + 1; } - nns_orc_mul_c_u32 (data_u32, 33, array_size); + nns_orc_mul_c_u32 (data_u32, data_u32, 33, array_size); for (i = 0; i < array_size; i++) { EXPECT_EQ (data_u32[i], (i + 1) * 33); @@ -2062,7 +2062,7 @@ TEST (test_tensor_transform, orc_mul) data_f32[i] = i + 1 - .1; } - nns_orc_mul_c_f32 (data_f32, -10.2, array_size); + nns_orc_mul_c_f32 (data_f32, data_f32, -10.2, array_size); for (i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ (data_f32[i], (i + 1 - .1) * (-10.2)); @@ -2072,7 +2072,7 @@ TEST (test_tensor_transform, orc_mul) data_f32[i] = i + .1; } - nns_orc_mul_c_f32 (data_f32, 10.2, array_size); + nns_orc_mul_c_f32 (data_f32, data_f32, 10.2, array_size); for (i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ (data_f32[i], (i + .1) * 10.2); @@ -2085,7 +2085,7 @@ TEST (test_tensor_transform, orc_mul) data_f64[i] = i + 1 - .1; } - nns_orc_mul_c_f64 (data_f64, -20.5, array_size); + nns_orc_mul_c_f64 (data_f64, data_f64, -20.5, array_size); for (i = 0; i < array_size; i++) { EXPECT_DOUBLE_EQ (data_f64[i], (i + 1 - .1) * (-20.5)); @@ -2095,7 +2095,7 @@ TEST (test_tensor_transform, orc_mul) data_f64[i] = i + .2; } - nns_orc_mul_c_f64 (data_f64, 20.5, array_size); + nns_orc_mul_c_f64 (data_f64, data_f64, 20.5, array_size); for (i = 0; i < array_size; i++) { EXPECT_DOUBLE_EQ (data_f64[i], (i + .2) * 20.5); @@ -2117,7 +2117,7 @@ TEST (test_tensor_transform, orc_div) data_f32[i] = i + 1 - .1; } - nns_orc_div_c_f32 (data_f32, -2.2, array_size); + nns_orc_div_c_f32 (data_f32, data_f32, -2.2, array_size); for (i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ (data_f32[i], (i + 1 - .1) / (-2.2)); @@ -2127,7 +2127,7 @@ TEST (test_tensor_transform, orc_div) data_f32[i] = i + 10.1; } - nns_orc_div_c_f32 (data_f32, 10.2, array_size); + nns_orc_div_c_f32 (data_f32, data_f32, 10.2, array_size); for (i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ (data_f32[i], (i + 10.1) / 10.2); @@ -2140,7 +2140,7 @@ TEST (test_tensor_transform, orc_div) data_f64[i] = i + 1 - .1; } - nns_orc_div_c_f64 (data_f64, -10.5, array_size); + nns_orc_div_c_f64 (data_f64, data_f64, -10.5, array_size); for (i = 0; i < array_size; i++) { EXPECT_DOUBLE_EQ (data_f64[i], (i + 1 - .1) / (-10.5)); @@ -2150,7 +2150,7 @@ TEST (test_tensor_transform, orc_div) data_f64[i] = i + .2; } - nns_orc_div_c_f64 (data_f64, 5.5, array_size); + nns_orc_div_c_f64 (data_f64, data_f64, 5.5, array_size); for (i = 0; i < array_size; i++) { EXPECT_DOUBLE_EQ (data_f64[i], (i + .2) / 5.5); @@ -2874,7 +2874,7 @@ TEST (test_tensor_transform, orc_performance) /* orc add u8 */ start_ts = g_get_real_time (); - nns_orc_add_c_u8 (data_u8, 2, array_size); + nns_orc_add_c_u8 (data_u8, data_u8, 2, array_size); stop_ts = g_get_real_time (); diff_orc = stop_ts - start_ts; @@ -2896,7 +2896,7 @@ TEST (test_tensor_transform, orc_performance) /* orc mul u8 */ start_ts = g_get_real_time (); - nns_orc_mul_c_u8 (data_u8, 2, array_size); + nns_orc_mul_c_u8 (data_u8, data_u8, 2, array_size); stop_ts = g_get_real_time (); diff_orc = stop_ts - start_ts; @@ -2940,7 +2940,7 @@ TEST (test_tensor_transform, orc_performance) /* orc div f32 */ start_ts = g_get_real_time (); - nns_orc_div_c_f32 (data_float, 2., array_size); + nns_orc_div_c_f32 (data_float, data_float, 2., array_size); stop_ts = g_get_real_time (); diff_orc = stop_ts - start_ts; @@ -2962,7 +2962,7 @@ TEST (test_tensor_transform, orc_performance) /* orc mul f32 */ start_ts = g_get_real_time (); - nns_orc_mul_c_f32 (data_float, 2., array_size); + nns_orc_mul_c_f32 (data_float, data_float, 2., array_size); stop_ts = g_get_real_time (); diff_orc = stop_ts - start_ts; @@ -2984,7 +2984,7 @@ TEST (test_tensor_transform, orc_performance) /* orc add f32 */ start_ts = g_get_real_time (); - nns_orc_add_c_f32 (data_float, 2., array_size); + nns_orc_add_c_f32 (data_float, data_float, 2., array_size); stop_ts = g_get_real_time (); diff_orc = stop_ts - start_ts; @@ -3012,8 +3012,8 @@ TEST (test_tensor_transform, orc_performance) /* orc typecast - add - mul */ start_ts = g_get_real_time (); nns_orc_conv_u8_to_f32 (data_float, data_u8, array_size); - nns_orc_add_c_f32 (data_float, .2, array_size); - nns_orc_mul_c_f32 (data_float, 1.2, array_size); + nns_orc_add_c_f32 (data_float, data_float, .2, array_size); + nns_orc_mul_c_f32 (data_float, data_float, 1.2, array_size); stop_ts = g_get_real_time (); diff_orc = stop_ts - start_ts;