From: Stella Stamenova Date: Fri, 2 Oct 2020 16:26:21 +0000 (-0700) Subject: Revert "[WebAssembly] Emulate v128.const efficiently" X-Git-Tag: llvmorg-13-init~10280 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=432e4e56d3d25c209b3336655aa374095e695956;p=platform%2Fupstream%2Fllvm.git Revert "[WebAssembly] Emulate v128.const efficiently" This reverts commit 542523a61a21c13e7f244bcf821b0fdeb8c6bb24. --- diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 8474e50..425f8b8 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -30,7 +30,6 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" @@ -1566,7 +1565,6 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, }; } else if (NumConstantLanes >= NumSplatLanes && Subtarget->hasUnimplementedSIMD128()) { - // If we support v128.const, emit it directly SmallVector ConstLanes; for (const SDValue &Lane : Op->op_values()) { if (IsConstant(Lane)) { @@ -1578,67 +1576,11 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, } } Result = DAG.getBuildVector(VecT, DL, ConstLanes); - IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { return IsConstant(Lane); }; - } else if (NumConstantLanes >= NumSplatLanes && VecT.isInteger()) { - // Otherwise, if this is an integer vector, pack the lane values together so - // we can construct the 128-bit constant from a pair of i64s using a splat - // followed by at most one i64x2.replace_lane. Also keep track of the lanes - // that actually matter so we can avoid the replace_lane in more cases. - std::array I64s({0, 0}); - std::array ConstLaneMasks({0, 0}); - uint8_t *I64Bytes = reinterpret_cast(I64s.data()); - uint8_t *MaskBytes = reinterpret_cast(ConstLaneMasks.data()); - unsigned I = 0; - size_t ByteStep = VecT.getScalarSizeInBits() / 8; - for (const SDValue &Lane : Op->op_values()) { - if (IsConstant(Lane)) { - using llvm::support::little; - using llvm::support::endian::byte_swap; - // The endianness of the compiler matters here. We want to enforce - // little endianness so that the bytes of a smaller integer type will - // occur first in the uint64_t. - auto *Const = cast(Lane.getNode()); - uint64_t Val = byte_swap(Const->getLimitedValue(), little); - uint8_t *ValPtr = reinterpret_cast(&Val); - std::copy(ValPtr, ValPtr + ByteStep, I64Bytes + I * ByteStep); - uint64_t Mask = uint64_t(-1LL); - uint8_t *MaskPtr = reinterpret_cast(&Mask); - std::copy(MaskPtr, MaskPtr + ByteStep, MaskBytes + I * ByteStep); - } - ++I; - } - // Check whether all constant lanes in the second half of the vector are - // equivalent in the first half or vice versa to determine whether splatting - // either side will be sufficient to materialize the constant. As a special - // case, if the first and second halves have no constant lanes in common, we - // can just combine them. - bool FirstHalfSufficient = (I64s[0] & ConstLaneMasks[1]) == I64s[1]; - bool SecondHalfSufficient = (I64s[1] & ConstLaneMasks[0]) == I64s[0]; - bool CombinedSufficient = (ConstLaneMasks[0] & ConstLaneMasks[1]) == 0; - - uint64_t Splatted; - if (SecondHalfSufficient) { - Splatted = I64s[1]; - } else if (CombinedSufficient) { - Splatted = I64s[0] | I64s[1]; - } else { - Splatted = I64s[0]; - } - - Result = DAG.getSplatBuildVector(MVT::v2i64, DL, - DAG.getConstant(Splatted, DL, MVT::i64)); - if (!FirstHalfSufficient && !SecondHalfSufficient && !CombinedSufficient) { - Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Result, - DAG.getConstant(I64s[1], DL, MVT::i64), - DAG.getConstant(1, DL, MVT::i32)); - } - Result = DAG.getBitcast(VecT, Result); - IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { - return IsConstant(Lane); - }; - } else { + } + if (!Result) { // Use a splat, but possibly a load_splat LoadSDNode *SplattedLoad; if ((SplattedLoad = dyn_cast(SplatValue)) && @@ -1651,14 +1593,11 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, } else { Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); } - IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) { + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { return Lane == SplatValue; }; } - assert(Result); - assert(IsLaneConstructed); - // Add replace_lane instructions for any unhandled values for (size_t I = 0; I < Lanes; ++I) { const SDValue &Lane = Op->getOperand(I); diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll index afd7375..43cfa97 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll @@ -8,73 +8,12 @@ target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -; CHECK-LABEL: emulated_const_trivial_splat: -; CHECK-NEXT: .functype emulated_const_trivial_splat () -> (v128) -; SIMD-VM-NEXT: i64.const $push0=, 8589934593 -; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 -; SIMD-VM-NEXT: return $pop1 -; UNIMP: v128.const -define <4 x i32> @emulated_const_trivial_splat() { - ret <4 x i32> -} - -; CHECK-LABEL: emulated_const_first_sufficient: -; CHECK-NEXT: .functype emulated_const_first_sufficient () -> (v128) -; SIMD-VM-NEXT: i64.const $push0=, 8589934593 -; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 -; SIMD-VM-NEXT: return $pop1 -; UNIMP: v128.const -define <4 x i32> @emulated_const_first_sufficient() { - ret <4 x i32> -} - -; CHECK-LABEL: emulated_const_second_sufficient: -; CHECK-NEXT: .functype emulated_const_second_sufficient () -> (v128) -; SIMD-VM-NEXT: i64.const $push0=, 8589934593 -; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 -; SIMD-VM-NEXT: return $pop1 -; UNIMP: v128.const -define <4 x i32> @emulated_const_second_sufficient() { - ret <4 x i32> -} - -; CHECK-LABEL: emulated_const_combined_sufficient: -; CHECK-NEXT: .functype emulated_const_combined_sufficient () -> (v128) -; SIMD-VM-NEXT: i64.const $push0=, 8589934593 -; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 -; SIMD-VM-NEXT: return $pop1 -; UNIMP: v128.const -define <4 x i32> @emulated_const_combined_sufficient() { - ret <4 x i32> -} - -; CHECK-LABEL: emulated_const_either_sufficient: -; CHECK-NEXT: .functype emulated_const_either_sufficient () -> (v128) -; SIMD-VM-NEXT: i64.const $push0=, 1 -; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 -; SIMD-VM-NEXT: return $pop1 -; UNIMP: v128.const -define <4 x i32> @emulated_const_either_sufficient() { - ret <4 x i32> -} - -; CHECK-LABEL: emulated_const_neither_sufficient: -; CHECK-NEXT: .functype emulated_const_neither_sufficient () -> (v128) -; SIMD-VM-NEXT: i64.const $push0=, 8589934593 -; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 -; SIMD-VM-NEXT: i64.const $push2=, 17179869184 -; SIMD-VM-NEXT: i64x2.replace_lane $push3=, $pop1, 1, $pop2 -; SIMD-VM-NEXT: return $pop3 -define <4 x i32> @emulated_const_neither_sufficient() { - ret <4 x i32> -} - ; CHECK-LABEL: same_const_one_replaced_i16x8: ; CHECK-NEXT: .functype same_const_one_replaced_i16x8 (i32) -> (v128) ; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42 ; UNIMP-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 ; UNIMP-NEXT: return $pop[[L1]] -; SIMD-VM: i64x2.splat +; SIMD-VM: i16x8.splat define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) { %v = insertelement <8 x i16> , @@ -88,7 +27,7 @@ define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) { ; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8 ; UNIMP-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 ; UNIMP-NEXT: return $pop[[L1]] -; SIMD-VM: i64x2.splat +; SIMD-VM: i16x8.splat define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) { %v = insertelement <8 x i16> , @@ -129,7 +68,7 @@ define <4 x float> @different_const_one_replaced_f32x4(float %x) { ; CHECK-NEXT: .functype splat_common_const_i32x4 () -> (v128) ; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 0, 3, 3, 1 ; UNIMP-NEXT: return $pop[[L0]] -; SIMD-VM: i64x2.splat +; SIMD-VM: i32x4.splat define <4 x i32> @splat_common_const_i32x4() { ret <4 x i32> } @@ -267,7 +206,7 @@ define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %spla ; UNIMP: i8x16.replace_lane ; UNIMP: i8x16.replace_lane ; UNIMP: return -; SIMD-VM: i64x2.splat +; SIMD-VM: i8x16.splat define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { ; swizzle 0 %m0 = extractelement <16 x i8> %mask, i32 0