From: Jaeyun Date: Wed, 5 Dec 2018 08:10:32 +0000 (+0900) Subject: [Orc] update functions for divide, typecast X-Git-Tag: v0.1.0~85 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=202fa557fb77b2381eac454727da5172ae3ba825;p=platform%2Fupstream%2Fnnstreamer.git [Orc] update functions for divide, typecast 1. add function _div (float and double only) 2. add typecast to same type (copy input array to output) 3. update opcode for typecast Signed-off-by: Jaeyun Jung --- diff --git a/gst/tensor_transform/transform-orc.orc b/gst/tensor_transform/transform-orc.orc index a0d5ecf..34ec82c 100644 --- a/gst/tensor_transform/transform-orc.orc +++ b/gst/tensor_transform/transform-orc.orc @@ -14,6 +14,13 @@ mulsbw t1, d1, p1 convssswb d1, t1 +.function nns_orc_conv_s8_to_s8 +.dest 1 d1 int8_t +.source 1 s1 int8_t + +copyb d1, s1 + + .function nns_orc_conv_s8_to_u8 .dest 1 d1 uint8_t .source 1 s1 int8_t @@ -98,6 +105,13 @@ convuuswb d1, t1 copyb d1, s1 +.function nns_orc_conv_u8_to_u8 +.dest 1 d1 uint8_t +.source 1 s1 uint8_t + +copyb d1, s1 + + .function nns_orc_conv_u8_to_s16 .dest 2 d1 int16_t .source 1 s1 uint8_t @@ -179,7 +193,14 @@ convssswb d1, s1 .dest 1 d1 uint8_t .source 2 s1 int16_t -convssswb d1, s1 +convwb d1, s1 + + +.function nns_orc_conv_s16_to_s16 +.dest 2 d1 int16_t +.source 2 s1 int16_t + +copyw d1, s1 .function nns_orc_conv_s16_to_u16 @@ -249,7 +270,7 @@ convssswb d1, s1 .dest 1 d1 uint8_t .source 2 s1 uint16_t -convssswb d1, s1 +convwb d1, s1 .function nns_orc_conv_u16_to_s16 @@ -259,6 +280,13 @@ convssswb d1, s1 copyw d1, s1 +.function nns_orc_conv_u16_to_u16 +.dest 2 d1 uint16_t +.source 2 s1 uint16_t + +copyw d1, s1 + + .function nns_orc_conv_u16_to_s32 .dest 4 d1 int32_t .source 2 s1 uint16_t @@ -321,8 +349,8 @@ convssswb d1, t1 .source 4 s1 int32_t .temp 2 t1 -convssslw t1, s1 -convssswb d1, t1 +convlw t1, s1 +convwb d1, t1 .function nns_orc_conv_s32_to_s16 @@ -340,6 +368,13 @@ convssslw d1, s1 convssslw d1, s1 +.function nns_orc_conv_s32_to_s32 +.dest 4 d1 int32_t +.source 4 s1 int32_t + +copyl d1, s1 + + .function nns_orc_conv_s32_to_u32 .dest 4 d1 uint32_t .source 4 s1 int32_t @@ -391,8 +426,8 @@ convssswb d1, t1 .source 4 s1 uint32_t .temp 2 t1 -convssslw t1, s1 -convssswb d1, t1 +convlw t1, s1 +convwb d1, t1 .function nns_orc_conv_u32_to_s16 @@ -416,6 +451,13 @@ convssslw d1, s1 copyl d1, s1 +.function nns_orc_conv_u32_to_u32 +.dest 4 d1 uint32_t +.source 4 s1 uint32_t + +copyl d1, s1 + + .function nns_orc_conv_u32_to_f32 .dest 4 d1 float .source 4 s1 uint32_t @@ -444,6 +486,13 @@ addf d1, d1, p1 mulf d1, d1, p1 +.function nns_orc_div_c_f32 +.dest 4 d1 float +.floatparam 4 p1 float + +divf d1, d1, p1 + + .function nns_orc_conv_f32_to_s8 .dest 1 d1 int8_t .source 4 s1 float @@ -462,8 +511,8 @@ convssswb d1, t2 .temp 2 t2 convfl t1, s1 -convssslw t2, t1 -convssswb d1, t2 +convlw t2, t1 +convwb d1, t2 .function nns_orc_conv_f32_to_s16 @@ -498,6 +547,13 @@ convfl d1, s1 convfl d1, s1 +.function nns_orc_conv_f32_to_f32 +.dest 4 d1 float +.source 4 s1 float + +copyl d1, s1 + + .function nns_orc_conv_f32_to_f64 .dest 8 d1 double .source 4 s1 float @@ -519,6 +575,13 @@ addd d1, d1, p1 muld d1, d1, p1 +.function nns_orc_div_c_f64 +.dest 8 d1 double +.doubleparam 8 p1 double + +divd d1, d1, p1 + + .function nns_orc_conv_f64_to_s8 .dest 1 d1 int8_t .source 8 s1 double @@ -537,8 +600,8 @@ convssswb d1, t2 .temp 2 t2 convdl t1, s1 -convssslw t2, t1 -convssswb d1, t2 +convlw t2, t1 +convwb d1, t2 .function nns_orc_conv_f64_to_s16 @@ -578,3 +641,10 @@ convdl d1, s1 .source 8 s1 double convdf d1, s1 + + +.function nns_orc_conv_f64_to_f64 +.dest 8 d1 double +.source 8 s1 double + +copyq d1, s1 diff --git a/tests/nnstreamer_plugins/unittest_plugins.cpp b/tests/nnstreamer_plugins/unittest_plugins.cpp index dc6a275..92fc699 100644 --- a/tests/nnstreamer_plugins/unittest_plugins.cpp +++ b/tests/nnstreamer_plugins/unittest_plugins.cpp @@ -858,6 +858,61 @@ TEST (test_tensor_transform, orc_mul) } /** + * @brief Test for tensor_transform orc functions (div constant value) + */ +TEST (test_tensor_transform, orc_div) +{ + const guint array_size = 10; + guint i; + + /* div constant f32 */ + float data_f32[array_size] = { 0, }; + + for (i = 0; i < array_size; i++) { + data_f32[i] = i + 1 - .1; + } + + nns_orc_div_c_f32 (data_f32, -2.2, array_size); + + for (i = 0; i < array_size; i++) { + EXPECT_FLOAT_EQ (data_f32[i], (i + 1 - .1) / (-2.2)); + } + + for (i = 0; i < array_size; i++) { + data_f32[i] = i + 10.1; + } + + nns_orc_div_c_f32 (data_f32, 10.2, array_size); + + for (i = 0; i < array_size; i++) { + EXPECT_FLOAT_EQ (data_f32[i], (i + 10.1) / 10.2); + } + + /* div constant f64 */ + double data_f64[array_size] = { 0, }; + + for (i = 0; i < array_size; i++) { + data_f64[i] = i + 1 - .1; + } + + nns_orc_div_c_f64 (data_f64, -10.5, array_size); + + for (i = 0; i < array_size; i++) { + EXPECT_DOUBLE_EQ (data_f64[i], (i + 1 - .1) / (-10.5)); + } + + for (i = 0; i < array_size; i++) { + data_f64[i] = i + .2; + } + + nns_orc_div_c_f64 (data_f64, 5.5, array_size); + + for (i = 0; i < array_size; i++) { + EXPECT_DOUBLE_EQ (data_f64[i], (i + .2) / 5.5); + } +} + +/** * @brief Test for tensor_transform orc functions (convert s8 to other type) */ TEST (test_tensor_transform, orc_conv_s8) @@ -871,6 +926,15 @@ TEST (test_tensor_transform, orc_conv_s8) data_s8[i] = (i + 1) * -1; } + /* convert s8 */ + int8_t res_s8[array_size] = { 0, }; + + nns_orc_conv_s8_to_s8 (res_s8, data_s8, array_size); + + for (i = 0; i < array_size; i++) { + EXPECT_EQ (res_s8[i], (int8_t) data_s8[i]); + } + /* convert u8 */ uint8_t res_u8[array_size] = { 0, }; @@ -958,6 +1022,15 @@ TEST (test_tensor_transform, orc_conv_u8) EXPECT_EQ (res_s8[i], (int8_t) data_u8[i]); } + /* convert u8 */ + uint8_t res_u8[array_size] = { 0, }; + + nns_orc_conv_u8_to_u8 (res_u8, data_u8, array_size); + + for (i = 0; i < array_size; i++) { + EXPECT_EQ (res_u8[i], (uint8_t) data_u8[i]); + } + /* convert s16 */ int16_t res_s16[array_size] = { 0, }; @@ -1045,6 +1118,15 @@ TEST (test_tensor_transform, orc_conv_s16) EXPECT_EQ (res_u8[i], (uint8_t) data_s16[i]); } + /* convert s16 */ + int16_t res_s16[array_size] = { 0, }; + + nns_orc_conv_s16_to_s16 (res_s16, data_s16, array_size); + + for (i = 0; i < array_size; i++) { + EXPECT_EQ (res_s16[i], (int16_t) data_s16[i]); + } + /* convert u16 */ uint16_t res_u16[array_size] = { 0, }; @@ -1132,6 +1214,15 @@ TEST (test_tensor_transform, orc_conv_u16) EXPECT_EQ (res_s16[i], (int16_t) data_u16[i]); } + /* convert u16 */ + uint16_t res_u16[array_size] = { 0, }; + + nns_orc_conv_u16_to_u16 (res_u16, data_u16, array_size); + + for (i = 0; i < array_size; i++) { + EXPECT_EQ (res_u16[i], (uint16_t) data_u16[i]); + } + /* convert s32 */ int32_t res_s32[array_size] = { 0, }; @@ -1219,6 +1310,15 @@ TEST (test_tensor_transform, orc_conv_s32) EXPECT_EQ (res_u16[i], (uint16_t) data_s32[i]); } + /* convert s32 */ + int32_t res_s32[array_size] = { 0, }; + + nns_orc_conv_s32_to_s32 (res_s32, data_s32, array_size); + + for (i = 0; i < array_size; i++) { + EXPECT_EQ (res_s32[i], (int32_t) data_s32[i]); + } + /* convert u32 */ uint32_t res_u32[array_size] = { 0, }; @@ -1306,6 +1406,15 @@ TEST (test_tensor_transform, orc_conv_u32) EXPECT_EQ (res_s32[i], (int32_t) data_u32[i]); } + /* convert u32 */ + uint32_t res_u32[array_size] = { 0, }; + + nns_orc_conv_u32_to_u32 (res_u32, data_u32, array_size); + + for (i = 0; i < array_size; i++) { + EXPECT_EQ (res_u32[i], (uint32_t) data_u32[i]); + } + /* convert f32 */ float res_f32[array_size] = { 0, }; @@ -1396,6 +1505,15 @@ TEST (test_tensor_transform, orc_conv_f32) EXPECT_EQ (res_u32[i], (uint32_t) val); } + /* convert f32 */ + float res_f32[array_size] = { 0, }; + + nns_orc_conv_f32_to_f32 (res_f32, data_f32, array_size); + + for (i = 0; i < array_size; i++) { + EXPECT_FLOAT_EQ (res_f32[i], (float) data_f32[i]); + } + /* convert f64 */ double res_f64[array_size] = { 0, }; @@ -1485,12 +1603,21 @@ TEST (test_tensor_transform, orc_conv_f64) for (i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ (res_f32[i], (float) data_f64[i]); } + + /* convert f64 */ + double res_f64[array_size] = { 0, }; + + nns_orc_conv_f64_to_f64 (res_f64, data_f64, array_size); + + for (i = 0; i < array_size; i++) { + EXPECT_DOUBLE_EQ (res_f64[i], (double) data_f64[i]); + } } /** * @brief Test for tensor_transform orc functions (performance) */ -TEST (test_tensor_transform, orc_performance_u8) +TEST (test_tensor_transform, orc_performance) { const guint array_size = 80000; guint i; @@ -1500,7 +1627,7 @@ TEST (test_tensor_transform, orc_performance_u8) uint8_t *data_u8 = (uint8_t *) g_malloc0 (sizeof (uint8_t) * array_size); float *data_float = (float *) g_malloc0 (sizeof (float) * array_size); - /* orc add */ + /* orc add u8 */ start_ts = g_get_real_time (); nns_orc_add_c_u8 (data_u8, 2, array_size); stop_ts = g_get_real_time (); @@ -1522,7 +1649,7 @@ TEST (test_tensor_transform, orc_performance_u8) diff_loop = stop_ts - start_ts; _print_log ("add u8 loop: %" G_GINT64_FORMAT, diff_loop); - /* orc mul */ + /* orc mul u8 */ start_ts = g_get_real_time (); nns_orc_mul_c_u8 (data_u8, 2, array_size); stop_ts = g_get_real_time (); @@ -1566,7 +1693,73 @@ TEST (test_tensor_transform, orc_performance_u8) diff_loop = stop_ts - start_ts; _print_log ("conv u8 loop: %" G_GINT64_FORMAT, diff_loop); - /* init data */ + /* orc div f32 */ + start_ts = g_get_real_time (); + nns_orc_div_c_f32 (data_float, 2., array_size); + stop_ts = g_get_real_time (); + + diff_orc = stop_ts - start_ts; + _print_log ("div f32 orc: %" G_GINT64_FORMAT, diff_orc); + + for (i = 0; i < array_size; ++i) { + EXPECT_FLOAT_EQ (data_float[i], 8.); + } + + /* loop */ + start_ts = g_get_real_time (); + for (i = 0; i < array_size; ++i) { + data_float[i] /= 2.; + } + stop_ts = g_get_real_time (); + + diff_loop = stop_ts - start_ts; + _print_log ("div f32 loop: %" G_GINT64_FORMAT, diff_loop); + + /* orc mul f32 */ + start_ts = g_get_real_time (); + nns_orc_mul_c_f32 (data_float, 2., array_size); + stop_ts = g_get_real_time (); + + diff_orc = stop_ts - start_ts; + _print_log ("mul f32 orc: %" G_GINT64_FORMAT, diff_orc); + + for (i = 0; i < array_size; ++i) { + EXPECT_FLOAT_EQ (data_float[i], 8.); + } + + /* loop */ + start_ts = g_get_real_time (); + for (i = 0; i < array_size; ++i) { + data_float[i] *= 2.; + } + stop_ts = g_get_real_time (); + + diff_loop = stop_ts - start_ts; + _print_log ("mul f32 loop: %" G_GINT64_FORMAT, diff_loop); + + /* orc add f32 */ + start_ts = g_get_real_time (); + nns_orc_add_c_f32 (data_float, 2., array_size); + stop_ts = g_get_real_time (); + + diff_orc = stop_ts - start_ts; + _print_log ("add f32 orc: %" G_GINT64_FORMAT, diff_orc); + + for (i = 0; i < array_size; ++i) { + EXPECT_FLOAT_EQ (data_float[i], 18.); + } + + /* loop */ + start_ts = g_get_real_time (); + for (i = 0; i < array_size; ++i) { + data_float[i] += 2.; + } + stop_ts = g_get_real_time (); + + diff_loop = stop_ts - start_ts; + _print_log ("add f32 loop: %" G_GINT64_FORMAT, diff_loop); + + /* init data for tc combined */ for (i = 0; i < array_size; ++i) { data_u8[i] = 1; } @@ -1579,7 +1772,7 @@ TEST (test_tensor_transform, orc_performance_u8) stop_ts = g_get_real_time (); diff_orc = stop_ts - start_ts; - _print_log ("combined u8 orc: %" G_GINT64_FORMAT, diff_orc); + _print_log ("combined orc: %" G_GINT64_FORMAT, diff_orc); for (i = 0; i < array_size; ++i) { EXPECT_FLOAT_EQ (data_float[i], (1 + .2) * 1.2); @@ -1595,7 +1788,7 @@ TEST (test_tensor_transform, orc_performance_u8) stop_ts = g_get_real_time (); diff_loop = stop_ts - start_ts; - _print_log ("combined u8 loop: %" G_GINT64_FORMAT, diff_loop); + _print_log ("combined loop: %" G_GINT64_FORMAT, diff_loop); g_free (data_u8); g_free (data_float);