Fix vector's Narrow intrinsics (#81843)
authorRadek Doulik <radek.doulik@gmail.com>
Thu, 9 Feb 2023 12:40:40 +0000 (13:40 +0100)
committerGitHub <noreply@github.com>
Thu, 9 Feb 2023 12:40:40 +0000 (13:40 +0100)
The BCL Vector classes have non-saturating Narrow methods, while wasm instructions are saturating. AFAIK wasm does not have non-saturating narrow instructions. So instead of

    i8x16.narrow_i16x8_s
    i8x16.narrow_i16x8_u
    i16x8.narrow_i32x4_s
    i16x8.narrow_i32x4_u

use `v8x16.shuffle` instruction to implement the extract narrow operation.

This fixes `System.Numerics.Tests.GenericVectorTests.Narrow[U]Int*` tests.

src/mono/mono/mini/mini-llvm.c
src/mono/mono/mini/mini-ops.h
src/mono/mono/mini/simd-intrinsics.c

index 6c4c1bf..65f516d 100644 (file)
@@ -9833,6 +9833,37 @@ MONO_RESTORE_WARNING
                        values [ins->dreg] = LLVMBuildShuffleVector (builder, lhs, LLVMGetUndef (LLVMTypeOf (lhs)), shuffle_val, "");
                        break;
                }
+               case OP_WASM_EXTRACT_NARROW: {
+                       int nelems = LLVMGetVectorSize (LLVMTypeOf (lhs));
+                       int bytes = 16 / (nelems * 2);
+                       LLVMTypeRef itype;
+
+                       switch(nelems) {
+                               case 2:
+                                       itype = i4_t;
+                                       break;
+                               case 4:
+                                       itype = i2_t;
+                                       break;
+                               case 8:
+                                       itype = i1_t;
+                                       break;
+                               default:
+                                       g_assert_not_reached();
+                       }
+
+                       LLVMValueRef mask = LLVMConstNull (LLVMVectorType (i1_t, 16));
+                       for (int i = 0; i < nelems; ++i) {
+                               for (int j = 0; j < bytes; ++j) {
+                                       mask = LLVMBuildInsertElement (builder, mask, const_int8 (i * bytes * 2 + j), const_int32 (i * bytes + j), "");
+                                       mask = LLVMBuildInsertElement (builder, mask, const_int8 (16 + i * bytes * 2 + j), const_int32 (8 + i * bytes + j), "");
+                               }
+                       }
+
+                       LLVMValueRef shuffle = LLVMBuildShuffleVector (builder, LLVMBuildBitCast (builder, lhs, LLVMVectorType (i1_t, 16), ""), LLVMBuildBitCast (builder, rhs, LLVMVectorType (i1_t, 16), ""), mask, "");
+                       values [ins->dreg] = LLVMBuildBitCast (builder, shuffle, LLVMVectorType (itype, nelems * 2), "");
+                       break;
+               }
 #endif
 #if defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_WASM)
                case OP_XEQUAL: {
index 7db9500..9f32834 100644 (file)
@@ -853,6 +853,7 @@ MINI_OP(OP_WASM_SIMD_BITMASK, "wasm_bitmask", IREG, XREG, NONE)
 MINI_OP3(OP_WASM_SIMD_SHUFFLE, "wasm_shuffle", XREG, XREG, XREG, XREG)
 MINI_OP(OP_WASM_SIMD_SUM, "wasm_sum", XREG, XREG, NONE)
 MINI_OP(OP_WASM_SIMD_SWIZZLE, "wasm_swizzle", XREG, XREG, XREG)
+MINI_OP(OP_WASM_EXTRACT_NARROW, "wasm_extract_narrow", XREG, XREG, XREG)
 #endif
 
 #if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_WASM)
index cb79df1..31ce239 100644 (file)
@@ -1621,25 +1621,16 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
                if (size != 16)
                        return NULL;
 
-               int intrins = -1;
                switch (arg0_type) {
                case MONO_TYPE_I2:
-                       intrins = INTRINS_WASM_NARROW_SIGNED_V16;
-                       break;
                case MONO_TYPE_I4:
-                       intrins = INTRINS_WASM_NARROW_SIGNED_V8;
-                       break;
+               case MONO_TYPE_I8:
                case MONO_TYPE_U2:
-                       intrins = INTRINS_WASM_NARROW_UNSIGNED_V16;
-                       break;
                case MONO_TYPE_U4:
-                       intrins = INTRINS_WASM_NARROW_UNSIGNED_V8;
-                       break;
+               case MONO_TYPE_U8:
+                       return emit_simd_ins_for_sig (cfg, klass, OP_WASM_EXTRACT_NARROW, -1, -1, fsig, args);
                }
 
-               if (intrins != -1)
-                       return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, intrins, arg0_type, fsig, args);
-
                return NULL;
 #else
                return NULL;