From 82a13d05ab7184a93befe7c5c284b79596cd5fb3 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Mon, 16 May 2022 17:51:45 -0700 Subject: [PATCH] [WebAssembly] Update relaxed SIMD opcodes and names to reflect the latest state of the proposal: https://github.com/WebAssembly/relaxed-simd/blob/main/proposals/relaxed-simd/Overview.md#binary-format. Moves code around to match the instruction order from the proposal, but the only functional changes are to the names and opcodes. Reviewed By: aheejin Differential Revision: https://reviews.llvm.org/D125726 --- .../lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 84 ++++++++++++---------- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll | 16 ++--- llvm/test/MC/WebAssembly/simd-encodings.s | 70 +++++++++--------- 3 files changed, 90 insertions(+), 80 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 449a44f..98944fc 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1334,7 +1334,37 @@ defm Q15MULR_SAT_S : SIMDBinary; //===----------------------------------------------------------------------===// -// Fused Multiply- Add and Subtract (FMA/FMS) +// Relaxed swizzle +//===----------------------------------------------------------------------===// + +defm RELAXED_SWIZZLE : + RELAXED_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), + [(set (v16i8 V128:$dst), + (int_wasm_relaxed_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], + "i8x16.relaxed_swizzle\t$dst, $src, $mask", "i8x16.relaxed_swizzle", 0x100>; + +//===----------------------------------------------------------------------===// +// Relaxed floating-point to int conversions +//===----------------------------------------------------------------------===// + +multiclass SIMD_RELAXED_CONVERT simdop> { + defm op#_#vec : + RELAXED_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), + [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))], + vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>; +} + +defm "" : SIMD_RELAXED_CONVERT; +defm "" : SIMD_RELAXED_CONVERT; +defm "" : SIMD_RELAXED_CONVERT; +defm "" : SIMD_RELAXED_CONVERT; + +//===----------------------------------------------------------------------===// +// Relaxed Fused Multiply- Add and Subtract (FMA/FMS) //===----------------------------------------------------------------------===// multiclass SIMDFM simdopA, bits<32> simdopS> { @@ -1342,16 +1372,18 @@ multiclass SIMDFM simdopA, bits<32> simdopS> { RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), [(set (vec.vt V128:$dst), (int_wasm_fma (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], - vec.prefix#".fma\t$dst, $a, $b, $c", vec.prefix#".fma", simdopA>; + vec.prefix#".relaxed_fma\t$dst, $a, $b, $c", + vec.prefix#".relaxed_fma", simdopA>; defm FMS_#vec : RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), [(set (vec.vt V128:$dst), (int_wasm_fms (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], - vec.prefix#".fms\t$dst, $a, $b, $c", vec.prefix#".fms", simdopS>; + vec.prefix#".relaxed_fms\t$dst, $a, $b, $c", + vec.prefix#".relaxed_fms", simdopS>; } -defm "" : SIMDFM; -defm "" : SIMDFM; +defm "" : SIMDFM; +defm "" : SIMDFM; //===----------------------------------------------------------------------===// // Laneselect @@ -1362,26 +1394,17 @@ multiclass SIMDLANESELECT op> { RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), [(set (vec.vt V128:$dst), (int_wasm_laneselect (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], - vec.prefix#".laneselect\t$dst, $a, $b, $c", vec.prefix#".laneselect", op>; + vec.prefix#".relaxed_laneselect\t$dst, $a, $b, $c", + vec.prefix#".relaxed_laneselect", op>; } -defm "" : SIMDLANESELECT; -defm "" : SIMDLANESELECT; -defm "" : SIMDLANESELECT; -defm "" : SIMDLANESELECT; +defm "" : SIMDLANESELECT; +defm "" : SIMDLANESELECT; +defm "" : SIMDLANESELECT; +defm "" : SIMDLANESELECT; //===----------------------------------------------------------------------===// -// Relaxed swizzle -//===----------------------------------------------------------------------===// - -defm RELAXED_SWIZZLE : - RELAXED_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), - [(set (v16i8 V128:$dst), - (int_wasm_relaxed_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], - "i8x16.relaxed_swizzle\t$dst, $src, $mask", "i8x16.relaxed_swizzle", 162>; - -//===----------------------------------------------------------------------===// // Relaxed floating-point min and max. //===----------------------------------------------------------------------===// @@ -1398,22 +1421,5 @@ multiclass SIMD_RELAXED_FMINMAX simdopMin, bits<32> simdopMax> vec.prefix#".relaxed_max\t$dst, $a, $b", vec.prefix#".relaxed_max", simdopMax>; } -defm "" : SIMD_RELAXED_FMINMAX; -defm "" : SIMD_RELAXED_FMINMAX; - -//===----------------------------------------------------------------------===// -// Relaxed floating-point to int conversions -//===----------------------------------------------------------------------===// - -multiclass SIMD_RELAXED_CONVERT simdop> { - defm op#_#vec : - RELAXED_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), - [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))], - vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>; -} - -defm "" : SIMD_RELAXED_CONVERT; -defm "" : SIMD_RELAXED_CONVERT; - -defm "" : SIMD_RELAXED_CONVERT; -defm "" : SIMD_RELAXED_CONVERT; +defm "" : SIMD_RELAXED_FMINMAX; +defm "" : SIMD_RELAXED_FMINMAX; diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index 8e22549..9da303d 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -182,7 +182,7 @@ define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) { ; CHECK-LABEL: laneselect_v16i8: ; CHECK-NEXT: .functype laneselect_v16i8 (v128, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i8x16.laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: i8x16.relaxed_laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} declare <16 x i8> @llvm.wasm.laneselect.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) define <16 x i8> @laneselect_v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { @@ -358,7 +358,7 @@ define <8 x i16> @narrow_unsigned_v8i16(<4 x i32> %low, <4 x i32> %high) { ; CHECK-LABEL: laneselect_v8i16: ; CHECK-NEXT: .functype laneselect_v8i16 (v128, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i16x8.laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: i16x8.relaxed_laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} declare <8 x i16> @llvm.wasm.laneselect.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) define <8 x i16> @laneselect_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { @@ -516,7 +516,7 @@ define <4 x i32> @trunc_sat_u_zero_v4i32_2(<2 x double> %x) { ; CHECK-LABEL: laneselect_v4i32: ; CHECK-NEXT: .functype laneselect_v4i32 (v128, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i32x4.laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: i32x4.relaxed_laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} declare <4 x i32> @llvm.wasm.laneselect.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) define <4 x i32> @laneselect_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { @@ -615,7 +615,7 @@ define <2 x i64> @bitselect_v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %c) { ; CHECK-LABEL: laneselect_v2i64: ; CHECK-NEXT: .functype laneselect_v2i64 (v128, v128, v128) -> (v128){{$}} -; CHECK-NEXT: i64x2.laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: i64x2.relaxed_laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} declare <2 x i64> @llvm.wasm.laneselect.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) define <2 x i64> @laneselect_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { @@ -702,7 +702,7 @@ define <4 x float> @nearest_v4f32(<4 x float> %a) { ; CHECK-LABEL: fma_v4f32: ; CHECK-NEXT: .functype fma_v4f32 (v128, v128, v128) -> (v128){{$}} -; CHECK-NEXT: f32x4.fma $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: f32x4.relaxed_fma $push[[R:[0-9]+]]=, $0, $1, $2{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} declare <4 x float> @llvm.wasm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) define <4 x float> @fma_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { @@ -714,7 +714,7 @@ define <4 x float> @fma_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; CHECK-LABEL: fms_v4f32: ; CHECK-NEXT: .functype fms_v4f32 (v128, v128, v128) -> (v128){{$}} -; CHECK-NEXT: f32x4.fms $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: f32x4.relaxed_fms $push[[R:[0-9]+]]=, $0, $1, $2{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} declare <4 x float> @llvm.wasm.fms.v4f32(<4 x float>, <4 x float>, <4 x float>) define <4 x float> @fms_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { @@ -825,7 +825,7 @@ define <2 x double> @nearest_v2f64(<2 x double> %a) { ; CHECK-LABEL: fma_v2f64: ; CHECK-NEXT: .functype fma_v2f64 (v128, v128, v128) -> (v128){{$}} -; CHECK-NEXT: f64x2.fma $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: f64x2.relaxed_fma $push[[R:[0-9]+]]=, $0, $1, $2{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} declare <2 x double> @llvm.wasm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) define <2 x double> @fma_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { @@ -837,7 +837,7 @@ define <2 x double> @fma_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c ; CHECK-LABEL: fms_v2f64: ; CHECK-NEXT: .functype fms_v2f64 (v128, v128, v128) -> (v128){{$}} -; CHECK-NEXT: f64x2.fms $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; CHECK-NEXT: f64x2.relaxed_fms $push[[R:[0-9]+]]=, $0, $1, $2{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} declare <2 x double> @llvm.wasm.fms.v2f64(<2 x double>, <2 x double>, <2 x double>) define <2 x double> @fms_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index 9f6c20f..626b235f 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -779,55 +779,59 @@ main: # CHECK: f64x2.convert_low_i32x4_u # encoding: [0xfd,0xff,0x01] f64x2.convert_low_i32x4_u - # CHECK: f32x4.fma # encoding: [0xfd,0xaf,0x01] - f32x4.fma + # CHECK: i8x16.relaxed_swizzle # encoding: [0xfd,0x80,0x02] + i8x16.relaxed_swizzle - # CHECK: f32x4.fms # encoding: [0xfd,0xb0,0x01] - f32x4.fms + # CHECK: i32x4.relaxed_trunc_f32x4_s # encoding: [0xfd,0x81,0x02] + i32x4.relaxed_trunc_f32x4_s - # CHECK: f64x2.fma # encoding: [0xfd,0xcf,0x01] - f64x2.fma + # CHECK: i32x4.relaxed_trunc_f32x4_u # encoding: [0xfd,0x82,0x02] + i32x4.relaxed_trunc_f32x4_u - # CHECK: f64x2.fms # encoding: [0xfd,0xd0,0x01] - f64x2.fms + # CHECK: i32x4.relaxed_trunc_f64x2_s_zero # encoding: [0xfd,0x83,0x02] + i32x4.relaxed_trunc_f64x2_s_zero - # CHECK: i8x16.laneselect # encoding: [0xfd,0xb2,0x01] - i8x16.laneselect + # CHECK: i32x4.relaxed_trunc_f64x2_u_zero # encoding: [0xfd,0x84,0x02] + i32x4.relaxed_trunc_f64x2_u_zero - # CHECK: i16x8.laneselect # encoding: [0xfd,0xb3,0x01] - i16x8.laneselect + # CHECK: f32x4.relaxed_fma # encoding: [0xfd,0x85,0x02] + f32x4.relaxed_fma - # CHECK: i32x4.laneselect # encoding: [0xfd,0xd2,0x01] - i32x4.laneselect + # CHECK: f32x4.relaxed_fms # encoding: [0xfd,0x86,0x02] + f32x4.relaxed_fms - # CHECK: i64x2.laneselect # encoding: [0xfd,0xd3,0x01] - i64x2.laneselect + # CHECK: f64x2.relaxed_fma # encoding: [0xfd,0x87,0x02] + f64x2.relaxed_fma - # CHECK: i8x16.relaxed_swizzle # encoding: [0xfd,0xa2,0x01] - i8x16.relaxed_swizzle + # CHECK: f64x2.relaxed_fms # encoding: [0xfd,0x88,0x02] + f64x2.relaxed_fms + + # CHECK: i8x16.relaxed_laneselect # encoding: [0xfd,0x89,0x02] + i8x16.relaxed_laneselect + + # CHECK: i16x8.relaxed_laneselect # encoding: [0xfd,0x8a,0x02] + i16x8.relaxed_laneselect + + # CHECK: i32x4.relaxed_laneselect # encoding: [0xfd,0x8b,0x02] + i32x4.relaxed_laneselect - # CHECK: f32x4.relaxed_min # encoding: [0xfd,0xb4,0x01] + # CHECK: i64x2.relaxed_laneselect # encoding: [0xfd,0x8c,0x02] + i64x2.relaxed_laneselect + + # CHECK: f32x4.relaxed_min # encoding: [0xfd,0x8d,0x02] f32x4.relaxed_min - # CHECK: f32x4.relaxed_max # encoding: [0xfd,0xe2,0x01] + # CHECK: f32x4.relaxed_max # encoding: [0xfd,0x8e,0x02] f32x4.relaxed_max - # CHECK: f64x2.relaxed_min # encoding: [0xfd,0xd4,0x01] + # CHECK: f64x2.relaxed_min # encoding: [0xfd,0x8f,0x02] f64x2.relaxed_min - # CHECK: f64x2.relaxed_max # encoding: [0xfd,0xee,0x01] + # CHECK: f64x2.relaxed_max # encoding: [0xfd,0x90,0x02] f64x2.relaxed_max - # CHECK: i32x4.relaxed_trunc_f32x4_s # encoding: [0xfd,0xa5,0x01] - i32x4.relaxed_trunc_f32x4_s - - # CHECK: i32x4.relaxed_trunc_f32x4_u # encoding: [0xfd,0xa6,0x01] - i32x4.relaxed_trunc_f32x4_u - - # CHECK: i32x4.relaxed_trunc_f64x2_s_zero # encoding: [0xfd,0xc5,0x01] - i32x4.relaxed_trunc_f64x2_s_zero - - # CHECK: i32x4.relaxed_trunc_f64x2_u_zero # encoding: [0xfd,0xc6,0x01] - i32x4.relaxed_trunc_f64x2_u_zero + # TODO: i16x8.relaxed_q15mulr_s # encoding: [0xfd,0x91,0x02] + # TODO: i16x8.dot_i8x16_i7x16_s # encoding: [0xfd,0x92,0x02] + # TODO: i32x4.dot_i8x16_i7x16_add_s # encoding: [0xfd,0x93,0x02] end_function -- 2.7.4