From f2550e0c445792f791cf8a752b1e4046f505cefc Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Sat, 15 Sep 2018 00:45:31 +0000 Subject: [PATCH] [WebAssembly] SIMD shifts Summary: Implement shifts of vectors by i32. Since LLVM defines shifts as binary operations between two vectors, this involves pattern matching on splatted shift operands. For v2i64 shifts any i32 shift operands have to be zero extended in the input and any i64 shift operands have to be wrapped in the output. Depends on D52007. Reviewers: aheejin, dschuff Subscribers: sbc100, jgravelle-google, sunfish, llvm-commits Differential Revision: https://reviews.llvm.org/D51906 llvm-svn: 342302 --- .../lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 26 ++ llvm/test/CodeGen/WebAssembly/simd-arith.ll | 265 +++++++++++++++++++++ llvm/test/MC/WebAssembly/simd-encodings.s | 36 +++ 3 files changed, 327 insertions(+) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 12f836e..5133318 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -128,6 +128,23 @@ multiclass SIMDBinaryFP baseInst> { defm "" : SIMDBinary; defm "" : SIMDBinary; } +multiclass SIMDShift simdop> { + defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x), + (outs), (ins), + [(set (vec_t V128:$dst), + (node V128:$vec, (vec_t shift_vec)))], + vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>; +} +multiclass SIMDShiftInt baseInst, int skip> { + defm "" : SIMDShift; + defm "" : SIMDShift; + defm "" : SIMDShift; + defm "" : SIMDShift; +} multiclass SIMDBitwise simdop> { defm "" : SIMDBinary; defm "" : SIMDBinary; @@ -311,6 +328,10 @@ defm "" : SIMDNegInt; defm "" : SIMDNegFP; defm "" : SIMDNegFP; +defm SHL : SIMDShiftInt; +defm SHR_S : SIMDShiftInt; +defm SHR_U : SIMDShiftInt; + let isCommutable = 1 in { defm AND : SIMDBitwise; defm OR : SIMDBitwise; @@ -397,6 +418,11 @@ foreach t2 = !foldl( ) in def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>; +// Truncate i64 shift operands to i32s +foreach shifts = [[shl, SHL_v2i64], [sra, SHR_S_v2i64], [srl, SHR_U_v2i64]] in +def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), (v2i64 (splat2 I64:$x)))), + (v2i64 (shifts[1] (v2i64 V128:$vec), (I32_WRAP_I64 I64:$x)))>; + // Shuffles after custom lowering def wasm_shuffle_t : SDTypeProfile<1, 18, []>; def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>; diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll index bc7f878..a2f4cf0 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -59,6 +59,65 @@ define <16 x i8> @neg_v16i8(<16 x i8> %x) { ret <16 x i8> %a } +; CHECK-LABEL: shl_v16i8: +; NO-SIMD128-NOT: i8x16 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i8x16.shl $push0=, $0, $1{{$}} +; SIMD128-NEXT: return $pop0{{$}} +define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) { + %t = insertelement <16 x i8> undef, i8 %x, i32 0 + %s = shufflevector <16 x i8> %t, <16 x i8> undef, + <16 x i32> + %a = shl <16 x i8> %v, %s + ret <16 x i8> %a +} + +; CHECK-LABEL: shl_const_v16i8: +; NO-SIMD128-NOT: i8x16 +; SIMD128-NEXT: .param v128{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5 +; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <16 x i8> @shl_const_v16i8(<16 x i8> %v) { + %a = shl <16 x i8> %v, + + ret <16 x i8> %a +} + +; CHECK-LABEL: shr_s_v16i8: +; NO-SIMD128-NOT: i8x16 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i8x16.shr_s $push0=, $0, $1{{$}} +; SIMD128-NEXT: return $pop0{{$}} +define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) { + %t = insertelement <16 x i8> undef, i8 %x, i32 0 + %s = shufflevector <16 x i8> %t, <16 x i8> undef, + <16 x i32> + %a = ashr <16 x i8> %v, %s + ret <16 x i8> %a +} + +; CHECK-LABEL: shr_u_v16i8: +; NO-SIMD128-NOT: i8x16 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i8x16.shr_u $push0=, $0, $1{{$}} +; SIMD128-NEXT: return $pop0{{$}} +define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) { + %t = insertelement <16 x i8> undef, i8 %x, i32 0 + %s = shufflevector <16 x i8> %t, <16 x i8> undef, + <16 x i32> + %a = lshr <16 x i8> %v, %s + ret <16 x i8> %a +} + ; CHECK-LABEL: and_v16i8: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .param v128, v128{{$}} @@ -154,6 +213,61 @@ define <8 x i16> @neg_v8i16(<8 x i16> %x) { ret <8 x i16> %a } +; CHECK-LABEL: shl_v8i16: +; NO-SIMD128-NOT: i16x8 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i16x8.shl $push0=, $0, $1{{$}} +; SIMD128-NEXT: return $pop0{{$}} +define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) { + %t = insertelement <8 x i16> undef, i16 %x, i32 0 + %s = shufflevector <8 x i16> %t, <8 x i16> undef, + <8 x i32> + %a = shl <8 x i16> %v, %s + ret <8 x i16> %a +} + +; CHECK-LABEL: shl_const_v8i16: +; NO-SIMD128-NOT: i16x8 +; SIMD128-NEXT: .param v128{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5 +; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <8 x i16> @shl_const_v8i16(<8 x i16> %v) { + %a = shl <8 x i16> %v, + + ret <8 x i16> %a +} + +; CHECK-LABEL: shr_s_v8i16: +; NO-SIMD128-NOT: i16x8 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i16x8.shr_s $push0=, $0, $1{{$}} +; SIMD128-NEXT: return $pop0{{$}} +define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) { + %t = insertelement <8 x i16> undef, i16 %x, i32 0 + %s = shufflevector <8 x i16> %t, <8 x i16> undef, + <8 x i32> + %a = ashr <8 x i16> %v, %s + ret <8 x i16> %a +} + +; CHECK-LABEL: shr_u_v8i16: +; NO-SIMD128-NOT: i16x8 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i16x8.shr_u $push0=, $0, $1{{$}} +; SIMD128-NEXT: return $pop0{{$}} +define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) { + %t = insertelement <8 x i16> undef, i16 %x, i32 0 + %s = shufflevector <8 x i16> %t, <8 x i16> undef, + <8 x i32> + %a = lshr <8 x i16> %v, %s + ret <8 x i16> %a +} + ; CHECK-LABEL: and_v8i16: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .param v128, v128{{$}} @@ -246,6 +360,60 @@ define <4 x i32> @neg_v4i32(<4 x i32> %x) { ret <4 x i32> %a } +; CHECK-LABEL: shl_v4i32: +; NO-SIMD128-NOT: i32x4 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i32x4.shl $push0=, $0, $1{{$}} +; SIMD128-NEXT: return $pop0{{$}} +define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) { + %t = insertelement <4 x i32> undef, i32 %x, i32 0 + %s = shufflevector <4 x i32> %t, <4 x i32> undef, + <4 x i32> + %a = shl <4 x i32> %v, %s + ret <4 x i32> %a +} + +; CHECK-LABEL: shl_const_v4i32: +; NO-SIMD128-NOT: i32x4 +; SIMD128-NEXT: .param v128{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5 +; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <4 x i32> @shl_const_v4i32(<4 x i32> %v) { + %a = shl <4 x i32> %v, + ret <4 x i32> %a +} + +; CHECK-LABEL: shr_s_v4i32: +; NO-SIMD128-NOT: i32x4 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i32x4.shr_s $push0=, $0, $1{{$}} +; SIMD128-NEXT: return $pop0{{$}} +define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) { + %t = insertelement <4 x i32> undef, i32 %x, i32 0 + %s = shufflevector <4 x i32> %t, <4 x i32> undef, + <4 x i32> + %a = ashr <4 x i32> %v, %s + ret <4 x i32> %a +} + +; CHECK-LABEL: shr_u_v4i32: +; NO-SIMD128-NOT: i32x4 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i32x4.shr_u $push0=, $0, $1{{$}} +; SIMD128-NEXT: return $pop0{{$}} +define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) { + %t = insertelement <4 x i32> undef, i32 %x, i32 0 + %s = shufflevector <4 x i32> %t, <4 x i32> undef, + <4 x i32> + %a = lshr <4 x i32> %v, %s + ret <4 x i32> %a +} + ; CHECK-LABEL: and_v4i32: ; NO-SIMD128-NOT: v128 ; SIMD128-NEXT: .param v128, v128{{$}} @@ -340,6 +508,103 @@ define <2 x i64> @neg_v2i64(<2 x i64> %x) { ret <2 x i64> %a } +; CHECK-LABEL: shl_v2i64: +; NO-SIMD128-NOT: i64x2 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i64x2.shl $push0=, $0, $1{{$}} +; SIMD128-NEXT: return $pop0{{$}} +define <2 x i64> @shl_v2i64(<2 x i64> %v, i32 %x) { + %x2 = zext i32 %x to i64 + %t = insertelement <2 x i64> undef, i64 %x2, i32 0 + %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> + %a = shl <2 x i64> %v, %s + ret <2 x i64> %a +} + +; CHECK-LABEL: shl_nozext_v2i64: +; NO-SIMD128-NOT: i64x2 +; SIMD128-NEXT: .param v128, i64{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}} +; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <2 x i64> @shl_nozext_v2i64(<2 x i64> %v, i64 %x) { + %t = insertelement <2 x i64> undef, i64 %x, i32 0 + %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> + %a = shl <2 x i64> %v, %s + ret <2 x i64> %a +} + +; CHECK-LABEL: shl_const_v2i64: +; NO-SIMD128-NOT: i64x2 +; SIMD128-NEXT: .param v128{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i64.const $push[[L0:[0-9]+]]=, 5{{$}} +; SIMD128-NEXT: i32.wrap/i64 $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <2 x i64> @shl_const_v2i64(<2 x i64> %v) { + %a = shl <2 x i64> %v, + ret <2 x i64> %a +} + +; CHECK-LABEL: shr_s_v2i64: +; NO-SIMD128-NOT: i64x2 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i64x2.shr_s $push0=, $0, $1{{$}} +; SIMD128-NEXT: return $pop0{{$}} +define <2 x i64> @shr_s_v2i64(<2 x i64> %v, i32 %x) { + %x2 = zext i32 %x to i64 + %t = insertelement <2 x i64> undef, i64 %x2, i32 0 + %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> + %a = ashr <2 x i64> %v, %s + ret <2 x i64> %a +} + +; CHECK-LABEL: shr_s_nozext_v2i64: +; NO-SIMD128-NOT: i64x2 +; SIMD128-NEXT: .param v128, i64{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}} +; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <2 x i64> @shr_s_nozext_v2i64(<2 x i64> %v, i64 %x) { + %t = insertelement <2 x i64> undef, i64 %x, i32 0 + %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> + %a = ashr <2 x i64> %v, %s + ret <2 x i64> %a +} + +; CHECK-LABEL: shr_u_v2i64: +; NO-SIMD128-NOT: i64x2 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i64x2.shr_u $push0=, $0, $1{{$}} +; SIMD128-NEXT: return $pop0{{$}} +define <2 x i64> @shr_u_v2i64(<2 x i64> %v, i32 %x) { + %x2 = zext i32 %x to i64 + %t = insertelement <2 x i64> undef, i64 %x2, i32 0 + %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> + %a = lshr <2 x i64> %v, %s + ret <2 x i64> %a +} + +; CHECK-LABEL: shr_u_nozext_v2i64: +; NO-SIMD128-NOT: i64x2 +; SIMD128-NEXT: .param v128, i64{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}} +; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <2 x i64> @shr_u_nozext_v2i64(<2 x i64> %v, i64 %x) { + %t = insertelement <2 x i64> undef, i64 %x, i32 0 + %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> + %a = lshr <2 x i64> %v, %s + ret <2 x i64> %a +} + ; CHECK-LABEL: and_v2i64: ; NO-SIMD128-NOT: v128 ; SIMD128-VM-NOT: v128 diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index 469b93e..7490a59 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -145,6 +145,42 @@ # CHECK: i64x2.neg # encoding: [0xfd,0x26] i64x2.neg + # CHECK: i8x16.shl # encoding: [0xfd,0x2f] + i8x16.shl + + # CHECK: i16x8.shl # encoding: [0xfd,0x30] + i16x8.shl + + # CHECK: i32x4.shl # encoding: [0xfd,0x31] + i32x4.shl + + # CHECK: i64x2.shl # encoding: [0xfd,0x32] + i64x2.shl + + # CHECK: i8x16.shr_s # encoding: [0xfd,0x33] + i8x16.shr_s + + # CHECK: i8x16.shr_u # encoding: [0xfd,0x34] + i8x16.shr_u + + # CHECK: i16x8.shr_s # encoding: [0xfd,0x35] + i16x8.shr_s + + # CHECK: i16x8.shr_u # encoding: [0xfd,0x36] + i16x8.shr_u + + # CHECK: i32x4.shr_s # encoding: [0xfd,0x37] + i32x4.shr_s + + # CHECK: i32x4.shr_u # encoding: [0xfd,0x38] + i32x4.shr_u + + # CHECK: i64x2.shr_s # encoding: [0xfd,0x39] + i64x2.shr_s + + # CHECK: i64x2.shr_u # encoding: [0xfd,0x3a] + i64x2.shr_u + # CHECK: v128.and # encoding: [0xfd,0x3b] v128.and -- 2.7.4