[Mono] Intrinsify Narrow for for Vector{64, 128} on Arm64 (#66409)
authorFan Yang <52458914+fanyang-mono@users.noreply.github.com>
Thu, 10 Mar 2022 22:00:05 +0000 (17:00 -0500)
committerGitHub <noreply@github.com>
Thu, 10 Mar 2022 22:00:05 +0000 (17:00 -0500)
* Add arm64 intrinsics for Narrow

* Fix integer types

* Fix more issues with integer types

* Add some comments

* Nit

* Change var names

* Fix format

* Fix format again

src/coreclr/jit/gentree.cpp
src/mono/mono/mini/simd-intrinsics.c

index 640ee36..64873f8 100644 (file)
@@ -20835,7 +20835,8 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types   type,
     else if (varTypeIsFloating(simdBaseType))
     {
         // var tmp1 = op1.ToVector128Unsafe();
-        // return AdvSimd.Arm64.ConvertToSingleLower(tmp1);
+        // var tmp2 = AdvSimd.InsertScalar(tmp1, op2);
+        // return AdvSimd.Arm64.ConvertToSingleLower(tmp2);
 
         CorInfoType tmp2BaseJitType = CORINFO_TYPE_DOUBLE;
 
@@ -20850,8 +20851,9 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types   type,
     else
     {
         // var tmp1 = op1.ToVector128Unsafe();
-        // var tmp2 = AdvSimd.InsertScalar(tmp1.AsUInt64(), 1, op2.AsUInt64());
-        // return AdvSimd.ExtractNarrowingUpper(tmp2).As<T>();
+        // var tmp2 = AdvSimd.InsertScalar(tmp1.AsUInt64(), 1, op2.AsUInt64()).As<T>(); - signed integer use int64,
+        // unsigned integer use uint64
+        // return AdvSimd.ExtractNarrowingLower(tmp2);
 
         CorInfoType tmp2BaseJitType = varTypeIsSigned(simdBaseType) ? CORINFO_TYPE_LONG : CORINFO_TYPE_ULONG;
 
index b4e4d42..668dc78 100644 (file)
@@ -778,6 +778,28 @@ type_to_extract_op (MonoTypeEnum type)
        }
 }
 
+static MonoClass *
+create_class_instance (MonoCompile *cfg, MonoMethodSignature *fsig, const char* name_space, const char *name, gboolean same_type, MonoType *param_type)
+{
+       MonoClass *ivector = mono_class_load_from_name (mono_defaults.corlib, name_space, name);
+       if (same_type) {
+               MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
+               param_type = mono_class_get_generic_class (arg_class)->context.class_inst->type_argv [0];
+       } else {
+               g_assert(param_type != NULL);
+       }
+
+       MonoType *args [ ] = { param_type };
+       MonoGenericContext ctx;
+       memset (&ctx, 0, sizeof (ctx));
+       ctx.class_inst = mono_metadata_get_generic_inst (1, args);
+       ERROR_DECL (error);
+       MonoClass *ivector_inst = mono_class_inflate_generic_class_checked (ivector, &ctx, error);
+       mono_error_assert_ok (error); /* FIXME don't swallow the error */
+
+       return ivector_inst;
+}
+
 static guint16 sri_vector_methods [] = {
        SN_Abs,
        SN_Add,
@@ -831,6 +853,7 @@ static guint16 sri_vector_methods [] = {
        SN_Max,
        SN_Min,
        SN_Multiply,
+       SN_Narrow,
        SN_Negate,
        SN_OnesComplement,
        SN_Sqrt,
@@ -1098,6 +1121,103 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
                        g_assert_not_reached ();
                }
        }
+       case SN_Narrow: {
+#ifdef TARGET_ARM64
+               if (!is_element_type_primitive (fsig->params [0]))
+                       return NULL;
+
+               MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
+               int size = mono_class_value_size (arg_class, NULL);
+
+               if (size == 16) {
+                       switch (arg0_type) {
+                       case MONO_TYPE_R8: {
+                               MonoInst *ins = emit_simd_ins (cfg, arg_class, OP_ARM64_FCVTN, args [0]->dreg, -1);
+                               return emit_simd_ins (cfg, arg_class, OP_ARM64_FCVTN2, ins->dreg, args [1]->dreg);
+                       }
+                       case MONO_TYPE_I2:
+                       case MONO_TYPE_I4:
+                       case MONO_TYPE_I8:
+                       case MONO_TYPE_U2:
+                       case MONO_TYPE_U4:
+                       case MONO_TYPE_U8: {
+                               MonoInst *ins = emit_simd_ins (cfg, arg_class, OP_ARM64_XTN, args [0]->dreg, -1);
+                               return emit_simd_ins (cfg, arg_class, OP_ARM64_XTN2, ins->dreg, args [1]->dreg);
+                       }
+                       default:
+                               return NULL;
+                       }
+               } else {
+                       switch (arg0_type) {
+                       case MONO_TYPE_R8: {
+                               //Widen arg0
+                               MonoInst *ins1 = emit_simd_ins (cfg, arg_class, OP_XWIDEN_UNSAFE, args [0]->dreg, -1);
+
+                               //Insert arg1 to arg0
+                               int tmp = alloc_ireg (cfg);
+                               MONO_EMIT_NEW_ICONST (cfg, tmp, 1);
+                               MonoInst *ins2 = emit_simd_ins (cfg, arg_class, OP_EXTRACT_R8, args [1]->dreg, -1);
+                               ins2->inst_c0 = 0;
+                               ins2->inst_c1 = arg0_type;
+
+                               MonoClass *ivector128_inst = create_class_instance (cfg, fsig, "System.Runtime.Intrinsics", "Vector128`1", TRUE, NULL);
+
+                               ins1 = emit_simd_ins (cfg, ivector128_inst, OP_XINSERT_R8, ins1->dreg, ins2->dreg);
+                               ins1->sreg3 = tmp;
+                               ins1->inst_c1 = arg0_type;
+
+                               //ConvertToSingleLower
+                               return emit_simd_ins (cfg, arg_class, OP_ARM64_FCVTN, ins1->dreg, -1);
+                       }
+                       case MONO_TYPE_I2:
+                       case MONO_TYPE_I4:
+                       case MONO_TYPE_I8:
+                       case MONO_TYPE_U2:
+                       case MONO_TYPE_U4:
+                       case MONO_TYPE_U8: {
+                               //Widen arg0
+                               MonoInst *arg0 = emit_simd_ins (cfg, arg_class, OP_XWIDEN_UNSAFE, args [0]->dreg, -1);
+
+                               //Cast arg0 and arg1 to u/int64
+                               MonoType *type_new;
+                               MonoTypeEnum type_enum_new;
+                               if (type_enum_is_unsigned (arg0_type)) {
+                                       type_new = m_class_get_byval_arg (mono_defaults.uint64_class);
+                                       type_enum_new = MONO_TYPE_U8;
+                               } else {
+                                       type_new = m_class_get_byval_arg (mono_defaults.int64_class);
+                                       type_enum_new = MONO_TYPE_I8;
+                               }
+                               MonoClass *ivector128_64_inst = create_class_instance (cfg, fsig, "System.Runtime.Intrinsics", "Vector128`1", FALSE, type_new);
+                               arg0 = emit_simd_ins (cfg, ivector128_64_inst, OP_XCAST, arg0->dreg, -1);
+                               MonoClass *ivector64_64_inst = create_class_instance (cfg, fsig, "System.Runtime.Intrinsics", "Vector64`1", FALSE, type_new);
+                               MonoInst *arg1 = emit_simd_ins (cfg, ivector64_64_inst, OP_XCAST, args [1]->dreg, -1);
+
+                               //Insert arg1 to arg0
+                               int tmp = alloc_ireg (cfg);
+                               MONO_EMIT_NEW_ICONST (cfg, tmp, 1);
+                               arg1 = emit_simd_ins (cfg, ivector64_64_inst, OP_EXTRACT_I8, arg1->dreg, -1);
+                               arg1->inst_c0 = 0;
+                               arg1->inst_c1 = type_enum_new;
+                               MonoClass *ivector128_inst = create_class_instance (cfg, fsig, "System.Runtime.Intrinsics", "Vector128`1", TRUE, NULL);
+                               MonoInst *ins = emit_simd_ins (cfg, ivector128_64_inst, OP_XINSERT_I8, arg0->dreg, arg1->dreg);
+                               ins->sreg3 = tmp;
+                               ins->inst_c1 = type_enum_new;
+
+                               //Cast arg0 back to its original element type (arg0_type)
+                               ins = emit_simd_ins (cfg, ivector128_inst, OP_XCAST, ins->dreg, -1);
+
+                               //ExtractNarrowingLower
+                               return emit_simd_ins (cfg, ivector128_inst, OP_ARM64_XTN, ins->dreg, -1);
+                       }
+                       default:
+                               return NULL;
+                       }
+               }
+#else
+               return NULL;
+#endif
+       }
        case SN_Negate:
        case SN_OnesComplement: {
                if (!is_element_type_primitive (fsig->params [0]))
@@ -2243,27 +2363,8 @@ emit_arm64_intrinsics (
                case SN_InsertScalar:
                case SN_Insert: {
                        MonoClass *ret_klass = mono_class_from_mono_type_internal (fsig->ret);
-                       int insert_op = 0;
-                       int extract_op = 0;
-                       switch (arg0_type) {
-                       case MONO_TYPE_I1: case MONO_TYPE_U1: insert_op = OP_XINSERT_I1; extract_op = OP_EXTRACT_I1; break;
-                       case MONO_TYPE_I2: case MONO_TYPE_U2: insert_op = OP_XINSERT_I2; extract_op = OP_EXTRACT_I2; break;
-                       case MONO_TYPE_I4: case MONO_TYPE_U4: insert_op = OP_XINSERT_I4; extract_op = OP_EXTRACT_I4; break;
-                       case MONO_TYPE_I8: case MONO_TYPE_U8: insert_op = OP_XINSERT_I8; extract_op = OP_EXTRACT_I8; break;
-                       case MONO_TYPE_R4: insert_op = OP_XINSERT_R4; extract_op = OP_EXTRACT_R4; break;
-                       case MONO_TYPE_R8: insert_op = OP_XINSERT_R8; extract_op = OP_EXTRACT_R8; break;
-                       case MONO_TYPE_I:
-                       case MONO_TYPE_U:
-#if TARGET_SIZEOF_VOID_P == 8
-                               insert_op = OP_XINSERT_I8;
-                               extract_op = OP_EXTRACT_I8;
-#else
-                               insert_op = OP_XINSERT_I4;
-                               extract_op = OP_EXTRACT_I4;
-#endif
-                               break;
-                       default: g_assert_not_reached ();
-                       }
+                       int insert_op = type_to_xinsert_op (arg0_type);
+                       int extract_op = type_to_extract_op (arg0_type);
                        int val_src_reg = args [2]->dreg;
                        switch (id) {
                        case SN_InsertSelectedScalar: {