From f2550e0c445792f791cf8a752b1e4046f505cefc Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Sat, 15 Sep 2018 00:45:31 +0000
Subject: [PATCH] [WebAssembly] SIMD shifts

Summary:
Implement shifts of vectors by i32. Since LLVM defines shifts as
binary operations between two vectors, this involves pattern matching
on splatted shift operands. For v2i64 shifts any i32 shift operands
have to be zero extended in the input and any i64 shift operands have
to be wrapped in the output. Depends on D52007.

Reviewers: aheejin, dschuff

Subscribers: sbc100, jgravelle-google, sunfish, llvm-commits

Differential Revision: https://reviews.llvm.org/D51906

llvm-svn: 342302
---
 .../lib/Target/WebAssembly/WebAssemblyInstrSIMD.td |  26 ++
 llvm/test/CodeGen/WebAssembly/simd-arith.ll        | 265 +++++++++++++++++++++
 llvm/test/MC/WebAssembly/simd-encodings.s          |  36 +++
 3 files changed, 327 insertions(+)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 12f836e..5133318 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -128,6 +128,23 @@ multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> {
   defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>;
   defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 1)>;
 }
+multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, dag shift_vec,
+                     string name, bits<32> simdop> {
+  defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x),
+                        (outs), (ins),
+                        [(set (vec_t V128:$dst),
+                          (node V128:$vec, (vec_t shift_vec)))],
+                        vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>;
+}
+multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst, int skip> {
+  defm "" : SIMDShift<v16i8, "i8x16", node, (splat16 I32:$x), name, baseInst>;
+  defm "" : SIMDShift<v8i16, "i16x8", node, (splat8 I32:$x), name,
+                      !add(baseInst, !if(skip, 2, 1))>;
+  defm "" : SIMDShift<v4i32, "i32x4", node, (splat4 I32:$x), name,
+                      !add(baseInst, !if(skip, 4, 2))>;
+  defm "" : SIMDShift<v2i64, "i64x2", node, (splat2 (i64 (zext I32:$x))),
+                      name, !add(baseInst, !if(skip, 6, 3))>;
+}
 multiclass SIMDBitwise<SDNode node, string name, bits<32> simdop> {
   defm "" : SIMDBinary<v16i8, "v128", node, name, simdop>;
   defm "" : SIMDBinary<v8i16, "v128", node, name, simdop>;
@@ -311,6 +328,10 @@ defm "" : SIMDNegInt<v2i64, "i64x2", splat2, i64, 38>;
 defm "" : SIMDNegFP<v4f32, "f32x4", splat4, f32, 114>;
 defm "" : SIMDNegFP<v2f64, "f64x2", splat2, f64, 115>;
 
+defm SHL : SIMDShiftInt<shl, "shl", 47, 0>;
+defm SHR_S : SIMDShiftInt<sra, "shr_s", 51, 1>;
+defm SHR_U : SIMDShiftInt<srl, "shr_u", 52, 1>;
+
 let isCommutable = 1 in {
 defm AND : SIMDBitwise<and, "and", 59>;
 defm OR : SIMDBitwise<or, "or", 60>;
@@ -397,6 +418,11 @@ foreach t2 = !foldl(
 ) in
 def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>;
 
+// Truncate i64 shift operands to i32s
+foreach shifts = [[shl, SHL_v2i64], [sra, SHR_S_v2i64], [srl, SHR_U_v2i64]] in
+def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), (v2i64 (splat2 I64:$x)))),
+          (v2i64 (shifts[1] (v2i64 V128:$vec), (I32_WRAP_I64 I64:$x)))>;
+
 // Shuffles after custom lowering
 def wasm_shuffle_t : SDTypeProfile<1, 18, []>;
 def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>;
diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index bc7f878..a2f4cf0 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -59,6 +59,65 @@ define <16 x i8> @neg_v16i8(<16 x i8> %x) {
   ret <16 x i8> %a
 }
 
+; CHECK-LABEL: shl_v16i8:
+; NO-SIMD128-NOT: i8x16
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i8x16.shl $push0=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop0{{$}}
+define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) {
+  %t = insertelement <16 x i8> undef, i8 %x, i32 0
+  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
+    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
+                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %a = shl <16 x i8> %v, %s
+  ret <16 x i8> %a
+}
+
+; CHECK-LABEL: shl_const_v16i8:
+; NO-SIMD128-NOT: i8x16
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
+; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
+  %a = shl <16 x i8> %v,
+    <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5,
+     i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  ret <16 x i8> %a
+}
+
+; CHECK-LABEL: shr_s_v16i8:
+; NO-SIMD128-NOT: i8x16
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i8x16.shr_s $push0=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop0{{$}}
+define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
+  %t = insertelement <16 x i8> undef, i8 %x, i32 0
+  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
+    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
+                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %a = ashr <16 x i8> %v, %s
+  ret <16 x i8> %a
+}
+
+; CHECK-LABEL: shr_u_v16i8:
+; NO-SIMD128-NOT: i8x16
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i8x16.shr_u $push0=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop0{{$}}
+define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
+  %t = insertelement <16 x i8> undef, i8 %x, i32 0
+  %s = shufflevector <16 x i8> %t, <16 x i8> undef,
+    <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
+                i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %a = lshr <16 x i8> %v, %s
+  ret <16 x i8> %a
+}
+
 ; CHECK-LABEL: and_v16i8:
 ; NO-SIMD128-NOT: v128
 ; SIMD128-NEXT: .param v128, v128{{$}}
@@ -154,6 +213,61 @@ define <8 x i16> @neg_v8i16(<8 x i16> %x) {
   ret <8 x i16> %a
 }
 
+; CHECK-LABEL: shl_v8i16:
+; NO-SIMD128-NOT: i16x8
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i16x8.shl $push0=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop0{{$}}
+define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) {
+  %t = insertelement <8 x i16> undef, i16 %x, i32 0
+  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
+    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %a = shl <8 x i16> %v, %s
+  ret <8 x i16> %a
+}
+
+; CHECK-LABEL: shl_const_v8i16:
+; NO-SIMD128-NOT: i16x8
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
+; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
+  %a = shl <8 x i16> %v,
+    <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  ret <8 x i16> %a
+}
+
+; CHECK-LABEL: shr_s_v8i16:
+; NO-SIMD128-NOT: i16x8
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i16x8.shr_s $push0=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop0{{$}}
+define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
+  %t = insertelement <8 x i16> undef, i16 %x, i32 0
+  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
+    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %a = ashr <8 x i16> %v, %s
+  ret <8 x i16> %a
+}
+
+; CHECK-LABEL: shr_u_v8i16:
+; NO-SIMD128-NOT: i16x8
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i16x8.shr_u $push0=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop0{{$}}
+define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
+  %t = insertelement <8 x i16> undef, i16 %x, i32 0
+  %s = shufflevector <8 x i16> %t, <8 x i16> undef,
+    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %a = lshr <8 x i16> %v, %s
+  ret <8 x i16> %a
+}
+
 ; CHECK-LABEL: and_v8i16:
 ; NO-SIMD128-NOT: v128
 ; SIMD128-NEXT: .param v128, v128{{$}}
@@ -246,6 +360,60 @@ define <4 x i32> @neg_v4i32(<4 x i32> %x) {
   ret <4 x i32> %a
 }
 
+; CHECK-LABEL: shl_v4i32:
+; NO-SIMD128-NOT: i32x4
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i32x4.shl $push0=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop0{{$}}
+define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) {
+  %t = insertelement <4 x i32> undef, i32 %x, i32 0
+  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
+    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+  %a = shl <4 x i32> %v, %s
+  ret <4 x i32> %a
+}
+
+; CHECK-LABEL: shl_const_v4i32:
+; NO-SIMD128-NOT: i32x4
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
+; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <4 x i32> @shl_const_v4i32(<4 x i32> %v) {
+  %a = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>
+  ret <4 x i32> %a
+}
+
+; CHECK-LABEL: shr_s_v4i32:
+; NO-SIMD128-NOT: i32x4
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i32x4.shr_s $push0=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop0{{$}}
+define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) {
+  %t = insertelement <4 x i32> undef, i32 %x, i32 0
+  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
+    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+  %a = ashr <4 x i32> %v, %s
+  ret <4 x i32> %a
+}
+
+; CHECK-LABEL: shr_u_v4i32:
+; NO-SIMD128-NOT: i32x4
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i32x4.shr_u $push0=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop0{{$}}
+define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) {
+  %t = insertelement <4 x i32> undef, i32 %x, i32 0
+  %s = shufflevector <4 x i32> %t, <4 x i32> undef,
+    <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+  %a = lshr <4 x i32> %v, %s
+  ret <4 x i32> %a
+}
+
 ; CHECK-LABEL: and_v4i32:
 ; NO-SIMD128-NOT: v128
 ; SIMD128-NEXT: .param v128, v128{{$}}
@@ -340,6 +508,103 @@ define <2 x i64> @neg_v2i64(<2 x i64> %x) {
   ret <2 x i64> %a
 }
 
+; CHECK-LABEL: shl_v2i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i64x2.shl $push0=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop0{{$}}
+define <2 x i64> @shl_v2i64(<2 x i64> %v, i32 %x) {
+  %x2 = zext i32 %x to i64
+  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
+  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %a = shl <2 x i64> %v, %s
+  ret <2 x i64> %a
+}
+
+; CHECK-LABEL: shl_nozext_v2i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-NEXT: .param v128, i64{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}}
+; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @shl_nozext_v2i64(<2 x i64> %v, i64 %x) {
+  %t = insertelement <2 x i64> undef, i64 %x, i32 0
+  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %a = shl <2 x i64> %v, %s
+  ret <2 x i64> %a
+}
+
+; CHECK-LABEL: shl_const_v2i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i64.const $push[[L0:[0-9]+]]=, 5{{$}}
+; SIMD128-NEXT: i32.wrap/i64 $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}}
+; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @shl_const_v2i64(<2 x i64> %v) {
+  %a = shl <2 x i64> %v, <i64 5, i64 5>
+  ret <2 x i64> %a
+}
+
+; CHECK-LABEL: shr_s_v2i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i64x2.shr_s $push0=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop0{{$}}
+define <2 x i64> @shr_s_v2i64(<2 x i64> %v, i32 %x) {
+  %x2 = zext i32 %x to i64
+  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
+  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %a = ashr <2 x i64> %v, %s
+  ret <2 x i64> %a
+}
+
+; CHECK-LABEL: shr_s_nozext_v2i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-NEXT: .param v128, i64{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}}
+; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @shr_s_nozext_v2i64(<2 x i64> %v, i64 %x) {
+  %t = insertelement <2 x i64> undef, i64 %x, i32 0
+  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %a = ashr <2 x i64> %v, %s
+  ret <2 x i64> %a
+}
+
+; CHECK-LABEL: shr_u_v2i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-NEXT: .param v128, i32{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i64x2.shr_u $push0=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop0{{$}}
+define <2 x i64> @shr_u_v2i64(<2 x i64> %v, i32 %x) {
+  %x2 = zext i32 %x to i64
+  %t = insertelement <2 x i64> undef, i64 %x2, i32 0
+  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %a = lshr <2 x i64> %v, %s
+  ret <2 x i64> %a
+}
+
+; CHECK-LABEL: shr_u_nozext_v2i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-NEXT: .param v128, i64{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}}
+; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @shr_u_nozext_v2i64(<2 x i64> %v, i64 %x) {
+  %t = insertelement <2 x i64> undef, i64 %x, i32 0
+  %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %a = lshr <2 x i64> %v, %s
+  ret <2 x i64> %a
+}
+
 ; CHECK-LABEL: and_v2i64:
 ; NO-SIMD128-NOT: v128
 ; SIMD128-VM-NOT: v128
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 469b93e..7490a59 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -145,6 +145,42 @@
     # CHECK: i64x2.neg # encoding: [0xfd,0x26]
     i64x2.neg
 
+    # CHECK: i8x16.shl # encoding: [0xfd,0x2f]
+    i8x16.shl
+
+    # CHECK: i16x8.shl # encoding: [0xfd,0x30]
+    i16x8.shl
+
+    # CHECK: i32x4.shl # encoding: [0xfd,0x31]
+    i32x4.shl
+
+    # CHECK: i64x2.shl # encoding: [0xfd,0x32]
+    i64x2.shl
+
+    # CHECK: i8x16.shr_s # encoding: [0xfd,0x33]
+    i8x16.shr_s
+
+    # CHECK: i8x16.shr_u # encoding: [0xfd,0x34]
+    i8x16.shr_u
+
+    # CHECK: i16x8.shr_s # encoding: [0xfd,0x35]
+    i16x8.shr_s
+
+    # CHECK: i16x8.shr_u # encoding: [0xfd,0x36]
+    i16x8.shr_u
+
+    # CHECK: i32x4.shr_s # encoding: [0xfd,0x37]
+    i32x4.shr_s
+
+    # CHECK: i32x4.shr_u # encoding: [0xfd,0x38]
+    i32x4.shr_u
+
+    # CHECK: i64x2.shr_s # encoding: [0xfd,0x39]
+    i64x2.shr_s
+
+    # CHECK: i64x2.shr_u # encoding: [0xfd,0x3a]
+    i64x2.shr_u
+
     # CHECK: v128.and # encoding: [0xfd,0x3b]
     v128.and
 
-- 
2.7.4