From da07942834fe3ea575d7f7b980940d938411afe5 Mon Sep 17 00:00:00 2001 From: Zhi An Ng Date: Fri, 15 Oct 2021 17:45:08 -0700 Subject: [PATCH] [WebAssembly] Add prototype relaxed laneselect instructions Add i8x16, i16x8, i32x4, i64x2 laneselect instructions. These are only exposed as builtins, and require user opt-in. --- clang/include/clang/Basic/BuiltinsWebAssembly.def | 5 +++ clang/lib/CodeGen/CGBuiltin.cpp | 11 +++++ clang/test/CodeGen/builtins-wasm.c | 28 +++++++++++++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 5 +++ .../lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 17 ++++++++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll | 48 ++++++++++++++++++++++ llvm/test/MC/WebAssembly/simd-encodings.s | 12 ++++++ 7 files changed, 126 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index 7784246..557189c 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -167,5 +167,10 @@ TARGET_BUILTIN(__builtin_wasm_fms_f32x4, "V4fV4fV4fV4f", "nc", "relaxed-simd") TARGET_BUILTIN(__builtin_wasm_fma_f64x2, "V2dV2dV2dV2d", "nc", "relaxed-simd") TARGET_BUILTIN(__builtin_wasm_fms_f64x2, "V2dV2dV2dV2d", "nc", "relaxed-simd") +TARGET_BUILTIN(__builtin_wasm_laneselect_i8x16, "V16ScV16ScV16ScV16Sc", "nc", "relaxed-simd") +TARGET_BUILTIN(__builtin_wasm_laneselect_i16x8, "V8sV8sV8sV8s", "nc", "relaxed-simd") +TARGET_BUILTIN(__builtin_wasm_laneselect_i32x4, "V4iV4iV4iV4i", "nc", "relaxed-simd") +TARGET_BUILTIN(__builtin_wasm_laneselect_i64x2, "V2LLiV2LLiV2LLiV2LLi", "nc", "relaxed-simd") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9172a21..9a44fdd 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18308,6 +18308,17 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(IntNo, A->getType()); return Builder.CreateCall(Callee, {A, B, C}); } + case WebAssembly::BI__builtin_wasm_laneselect_i8x16: + case WebAssembly::BI__builtin_wasm_laneselect_i16x8: + case WebAssembly::BI__builtin_wasm_laneselect_i32x4: + case WebAssembly::BI__builtin_wasm_laneselect_i64x2: { + Value *A = EmitScalarExpr(E->getArg(0)); + Value *B = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_laneselect, A->getType()); + return Builder.CreateCall(Callee, {A, B, C}); + } default: return nullptr; } diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index 914c37d..e86812c 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -704,3 +704,31 @@ f64x2 fms_f64x2(f64x2 a, f64x2 b, f64x2 c) { // WEBASSEMBLY-SAME: <2 x double> %a, <2 x double> %b, <2 x double> %c) // WEBASSEMBLY-NEXT: ret } + +i8x16 laneselect_i8x16(i8x16 a, i8x16 b, i8x16 c) { + return __builtin_wasm_laneselect_i8x16(a, b, c); + // WEBASSEMBLY: call <16 x i8> @llvm.wasm.laneselect.v16i8( + // WEBASSEMBLY-SAME: <16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + // WEBASSEMBLY-NEXT: ret +} + +i16x8 laneselect_i16x8(i16x8 a, i16x8 b, i16x8 c) { + return __builtin_wasm_laneselect_i16x8(a, b, c); + // WEBASSEMBLY: call <8 x i16> @llvm.wasm.laneselect.v8i16( + // WEBASSEMBLY-SAME: <8 x i16> %a, <8 x i16> %b, <8 x i16> %c) + // WEBASSEMBLY-NEXT: ret +} + +i32x4 laneselect_i32x4(i32x4 a, i32x4 b, i32x4 c) { + return __builtin_wasm_laneselect_i32x4(a, b, c); + // WEBASSEMBLY: call <4 x i32> @llvm.wasm.laneselect.v4i32( + // WEBASSEMBLY-SAME: <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + // WEBASSEMBLY-NEXT: ret +} + +i64x2 laneselect_i64x2(i64x2 a, i64x2 b, i64x2 c) { + return __builtin_wasm_laneselect_i64x2(a, b, c); + // WEBASSEMBLY: call <2 x i64> @llvm.wasm.laneselect.v2i64( + // WEBASSEMBLY-SAME: <2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + // WEBASSEMBLY-NEXT: ret +} diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index de0b36e..7d0f38b 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -195,6 +195,11 @@ def int_wasm_fms : [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_laneselect : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Thread-local storage intrinsics //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 4448faa..7f54073 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1344,3 +1344,20 @@ multiclass SIMDFM simdopA, bits<32> simdopS> { defm "" : SIMDFM; defm "" : SIMDFM; + +//===----------------------------------------------------------------------===// +// Laneselect +//===----------------------------------------------------------------------===// + +multiclass SIMDLANESELECT op> { + defm LANESELECT_#vec : + RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), + [(set (vec.vt V128:$dst), (int_wasm_laneselect + (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], + vec.prefix#".laneselect\t$dst, $a, $b, $c", vec.prefix#".laneselect", op>; +} + +defm "" : SIMDLANESELECT; +defm "" : SIMDLANESELECT; +defm "" : SIMDLANESELECT; +defm "" : SIMDLANESELECT; diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index 31f9afc..660a107 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -180,6 +180,18 @@ define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) { ret <16 x i8> %res } +; CHECK-LABEL: laneselect_v16i8: +; CHECK-NEXT: .functype laneselect_v16i8 (v128, v128, v128) -> (v128){{$}} +; CHECK-NEXT: i8x16.laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +declare <16 x i8> @llvm.wasm.laneselect.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +define <16 x i8> @laneselect_v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { + %v = call <16 x i8> @llvm.wasm.laneselect.v16i8( + <16 x i8> %a, <16 x i8> %b, <16 x i8> %c + ) + ret <16 x i8> %v +} + ; ============================================================================== ; 8 x i16 ; ============================================================================== @@ -334,6 +346,18 @@ define <8 x i16> @narrow_unsigned_v8i16(<4 x i32> %low, <4 x i32> %high) { ret <8 x i16> %a } +; CHECK-LABEL: laneselect_v8i16: +; CHECK-NEXT: .functype laneselect_v8i16 (v128, v128, v128) -> (v128){{$}} +; CHECK-NEXT: i16x8.laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +declare <8 x i16> @llvm.wasm.laneselect.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +define <8 x i16> @laneselect_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { + %v = call <8 x i16> @llvm.wasm.laneselect.v8i16( + <8 x i16> %a, <8 x i16> %b, <8 x i16> %c + ) + ret <8 x i16> %v +} + ; ============================================================================== ; 4 x i32 ; ============================================================================== @@ -480,6 +504,18 @@ define <4 x i32> @trunc_sat_zero_u_v4i32_2(<2 x double> %x) { ret <4 x i32> %a } +; CHECK-LABEL: laneselect_v4i32: +; CHECK-NEXT: .functype laneselect_v4i32 (v128, v128, v128) -> (v128){{$}} +; CHECK-NEXT: i32x4.laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +declare <4 x i32> @llvm.wasm.laneselect.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +define <4 x i32> @laneselect_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %v = call <4 x i32> @llvm.wasm.laneselect.v4i32( + <4 x i32> %a, <4 x i32> %b, <4 x i32> %c + ) + ret <4 x i32> %v +} + ; ============================================================================== ; 2 x i64 ; ============================================================================== @@ -525,6 +561,18 @@ define <2 x i64> @bitselect_v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %c) { ret <2 x i64> %a } +; CHECK-LABEL: laneselect_v2i64: +; CHECK-NEXT: .functype laneselect_v2i64 (v128, v128, v128) -> (v128){{$}} +; CHECK-NEXT: i64x2.laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +declare <2 x i64> @llvm.wasm.laneselect.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +define <2 x i64> @laneselect_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { + %v = call <2 x i64> @llvm.wasm.laneselect.v2i64( + <2 x i64> %a, <2 x i64> %b, <2 x i64> %c + ) + ret <2 x i64> %v +} + ; ============================================================================== ; 4 x f32 ; ============================================================================== diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index fa24bf9..990981c 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -791,4 +791,16 @@ main: # CHECK: f64x2.fms # encoding: [0xfd,0xd0,0x01] f64x2.fms + # CHECK: i8x16.laneselect # encoding: [0xfd,0xb2,0x01] + i8x16.laneselect + + # CHECK: i16x8.laneselect # encoding: [0xfd,0xb3,0x01] + i16x8.laneselect + + # CHECK: i32x4.laneselect # encoding: [0xfd,0xd2,0x01] + i32x4.laneselect + + # CHECK: i64x2.laneselect # encoding: [0xfd,0xd3,0x01] + i64x2.laneselect + end_function -- 2.7.4