From 0861c87b06c5573d919ec550bbbd2a1624d22ba0 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 10 Apr 2019 18:00:41 +0000 Subject: [PATCH] Revert rL357745: [SelectionDAG] Compute known bits of CopyFromReg Certain optimisations from ConstantHoisting and CGP rely on Selection DAG not seeing through to the constant in other blocks. Revert this patch while we come up with a better way to handle that. I will try to follow this up with some better tests. llvm-svn: 358113 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 -------------------- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +++--- .../test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll | 10 ++++++---- llvm/test/CodeGen/ARM/atomic-op.ll | 8 ++++---- llvm/test/CodeGen/PowerPC/pr35688.ll | 16 +++++++++------- llvm/test/CodeGen/SystemZ/subregliveness-04.ll | 2 +- llvm/test/CodeGen/X86/fold-tied-op.ll | 2 +- llvm/test/CodeGen/X86/pr28444.ll | 5 +++-- 8 files changed, 27 insertions(+), 42 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index caa3f14..c017d6d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -31,7 +31,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -3208,25 +3207,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One &= Known2.One; break; } - case ISD::CopyFromReg: { - auto R = cast(Op.getOperand(1)); - const unsigned Reg = R->getReg(); - - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - if (!TRI->isVirtualRegister(Reg)) - break; - - const MachineRegisterInfo *MRI = &MF->getRegInfo(); - if (!MRI->hasOneDef(Reg)) - break; - - const FunctionLoweringInfo::LiveOutInfo *LOI = FLI->GetLiveOutRegInfo(Reg); - if (!LOI || LOI->Known.getBitWidth() != BitWidth) - break; - - Known = LOI->Known; - break; - } case ISD::FrameIndex: case ISD::TargetFrameIndex: TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2f5db87..190d570 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19595,10 +19595,10 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32))) Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); - // If the operand types disagree, extend or truncate the shift amount to match. - // Since BT ignores high bits (like shifts) we can use anyextend for the extension. + // If the operand types disagree, extend the shift amount to match. Since + // BT ignores high bits (like shifts) we can use anyextend. if (Src.getValueType() != BitNo.getValueType()) - BitNo = DAG.getAnyExtOrTrunc(BitNo, dl, Src.getValueType()); + BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo); X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, dl, MVT::i8); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll index 4e85ca0..1c450e7 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll @@ -110,8 +110,8 @@ main_body: ;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb: ;CHECK-NOT: s_waitcnt; -;CHECK-NOT: v_or_b32 -;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8 +;CHECK: v_or_b32 +;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) { main_body: %tmp = shl i32 %index, 4 @@ -127,8 +127,10 @@ bb1: ; preds = %main_body ;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb_merged: ;CHECK-NOT: s_waitcnt; -;CHECK-NOT: v_or_b32 -;CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8 +;CHECK: v_or_b32 +;CHECK: v_or_b32 +;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen +;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) { main_body: %tmp = shl i32 %index, 4 diff --git a/llvm/test/CodeGen/ARM/atomic-op.ll b/llvm/test/CodeGen/ARM/atomic-op.ll index 5f206b6..8ab2026 100644 --- a/llvm/test/CodeGen/ARM/atomic-op.ll +++ b/llvm/test/CodeGen/ARM/atomic-op.ll @@ -183,11 +183,11 @@ entry: ret void } -define void @func2(i16 %int_val) nounwind { +define void @func2() nounwind { entry: %val = alloca i16 %old = alloca i16 - store i16 %int_val, i16* %val + store i16 31, i16* %val ; CHECK: ldrex ; CHECK: cmp ; CHECK: strex @@ -197,7 +197,7 @@ entry: ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw umin i16* %val, i16 16 monotonic store i16 %0, i16* %old - %uneg = sub i16 0, 2 + %uneg = sub i16 0, 1 ; CHECK: ldrex ; CHECK: cmp ; CHECK: strex @@ -249,7 +249,7 @@ entry: ; CHECK-T1-M0: bl ___sync_fetch_and_umin_1 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync - %uneg = sub i8 0, 2 + %uneg = sub i8 0, 1 %1 = atomicrmw umin i8* %val, i8 %uneg monotonic store i8 %1, i8* %old ; CHECK: ldrex diff --git a/llvm/test/CodeGen/PowerPC/pr35688.ll b/llvm/test/CodeGen/PowerPC/pr35688.ll index 7573e87..098573e 100644 --- a/llvm/test/CodeGen/PowerPC/pr35688.ll +++ b/llvm/test/CodeGen/PowerPC/pr35688.ll @@ -6,14 +6,16 @@ ; Function Attrs: nounwind define void @ec_GFp_nistp256_points_mul() { ; CHECK-LABEL: ec_GFp_nistp256_points_mul: -; CHECK: ld 4, 0(3) -; CHECK: li 3, 0 -; CHECK: subfic 5, 4, 0 -; CHECK: subfze 5, 3 +; CHECK: ld 5, 0(3) +; CHECK: li 3, 127 +; CHECK: li 4, 0 +; CHECK: subfic 6, 5, 0 +; CHECK: subfze 6, 4 +; CHECK: sradi 7, 6, 63 +; CHECK: srad 6, 6, 3 +; CHECK: subfc 5, 5, 7 +; CHECK: subfe 5, 4, 6 ; CHECK: sradi 5, 5, 63 -; CHECK: subfc 4, 4, 5 -; CHECK: subfe 4, 3, 5 -; CHECK: sradi 4, 4, 63 ; With MemorySSA, everything is taken out of the loop by licm. ; Loads and stores to undef are treated as non-aliasing. diff --git a/llvm/test/CodeGen/SystemZ/subregliveness-04.ll b/llvm/test/CodeGen/SystemZ/subregliveness-04.ll index cb9ae9b..11ecc9b 100644 --- a/llvm/test/CodeGen/SystemZ/subregliveness-04.ll +++ b/llvm/test/CodeGen/SystemZ/subregliveness-04.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -disable-early-taildup -disable-cgp -systemz-subreg-liveness < %s | FileCheck %s ; Check for successful compilation. -; CHECK: lhi {{%r[0-9]+}}, -5 +; CHECK: lhi %r0, -5 target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" target triple = "s390x-ibm-linux" diff --git a/llvm/test/CodeGen/X86/fold-tied-op.ll b/llvm/test/CodeGen/X86/fold-tied-op.ll index 6fe1713..eb06eb7 100644 --- a/llvm/test/CodeGen/X86/fold-tied-op.ll +++ b/llvm/test/CodeGen/X86/fold-tied-op.ll @@ -6,8 +6,8 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" target triple = "i386--netbsd" ; CHECK-LABEL: fn1 -; CHECK: addl {{.*#+}} 4-byte Folded Reload ; CHECK: orl {{.*#+}} 4-byte Folded Reload +; CHECK: addl {{.*#+}} 4-byte Folded Reload ; CHECK: xorl {{.*#+}} 4-byte Folded Reload ; CHECK: xorl {{.*#+}} 4-byte Folded Reload ; CHECK: retl diff --git a/llvm/test/CodeGen/X86/pr28444.ll b/llvm/test/CodeGen/X86/pr28444.ll index 4d7d08a..2338320 100644 --- a/llvm/test/CodeGen/X86/pr28444.ll +++ b/llvm/test/CodeGen/X86/pr28444.ll @@ -11,8 +11,9 @@ define void @extractelt_mismatch_vector_element_type(i32 %arg, i1 %x) { ; CHECK-LABEL: extractelt_mismatch_vector_element_type: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: movb $1, (%rax) -; CHECK-NEXT: movb $1, (%rax) +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: movb %al, (%rax) +; CHECK-NEXT: movb %al, (%rax) ; CHECK-NEXT: retq bb: %tmp = icmp ult i32 %arg, 0 -- 2.7.4