From 0376ac1d946466eb346c2055554153e11b0fc3cf Mon Sep 17 00:00:00 2001 From: Piotr Sobczak Date: Fri, 5 Apr 2019 07:44:09 +0000 Subject: [PATCH] [SelectionDAG] Compute known bits of CopyFromReg Summary: Teach SelectionDAG how to compute known bits of ISD::CopyFromReg if the virtual reg used has one def only. This can be particularly useful when calling isBaseWithConstantOffset() with the ISD::CopyFromReg argument, as more optimizations may get enabled in the result. Also add a missing truncation on X86, found by testing of this patch. Change-Id: Id1c9fceec862d118c54a5b53adf72ada5d6daefa Reviewers: bogner, craig.topper, RKSimon Reviewed By: RKSimon Subscribers: lebedev.ri, nemanjai, jvesely, nhaehnle, javed.absar, jsji, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59535 llvm-svn: 357745 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 ++++++++++++++++++++ llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +++--- .../test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll | 10 ++++------ llvm/test/CodeGen/ARM/atomic-op.ll | 8 ++++---- llvm/test/CodeGen/PowerPC/pr35688.ll | 16 +++++++--------- llvm/test/CodeGen/SystemZ/subregliveness-04.ll | 2 +- llvm/test/CodeGen/X86/fold-tied-op.ll | 2 +- llvm/test/CodeGen/X86/pr28444.ll | 5 ++--- 8 files changed, 42 insertions(+), 27 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 87ace6f..148a380 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -3202,6 +3203,25 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One &= Known2.One; break; } + case ISD::CopyFromReg: { + auto R = cast(Op.getOperand(1)); + const unsigned Reg = R->getReg(); + + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + if (!TRI->isVirtualRegister(Reg)) + break; + + const MachineRegisterInfo *MRI = &MF->getRegInfo(); + if (!MRI->hasOneDef(Reg)) + break; + + const FunctionLoweringInfo::LiveOutInfo *LOI = FLI->GetLiveOutRegInfo(Reg); + if (!LOI || LOI->Known.getBitWidth() != BitWidth) + break; + + Known = LOI->Known; + break; + } case ISD::FrameIndex: case ISD::TargetFrameIndex: TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 061a7d9..f1047ae 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19580,10 +19580,10 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32))) Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); - // If the operand types disagree, extend the shift amount to match. Since - // BT ignores high bits (like shifts) we can use anyextend. + // If the operand types disagree, extend or truncate the shift amount to match. + // Since BT ignores high bits (like shifts) we can use anyextend for the extension. if (Src.getValueType() != BitNo.getValueType()) - BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo); + BitNo = DAG.getAnyExtOrTrunc(BitNo, dl, Src.getValueType()); X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, dl, MVT::i8); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll index 1c450e7..4e85ca0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll @@ -110,8 +110,8 @@ main_body: ;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb: ;CHECK-NOT: s_waitcnt; -;CHECK: v_or_b32 -;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen +;CHECK-NOT: v_or_b32 +;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8 define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) { main_body: %tmp = shl i32 %index, 4 @@ -127,10 +127,8 @@ bb1: ; preds = %main_body ;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb_merged: ;CHECK-NOT: s_waitcnt; -;CHECK: v_or_b32 -;CHECK: v_or_b32 -;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen -;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen +;CHECK-NOT: v_or_b32 +;CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8 define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) { main_body: %tmp = shl i32 %index, 4 diff --git a/llvm/test/CodeGen/ARM/atomic-op.ll b/llvm/test/CodeGen/ARM/atomic-op.ll index 8ab2026..5f206b6 100644 --- a/llvm/test/CodeGen/ARM/atomic-op.ll +++ b/llvm/test/CodeGen/ARM/atomic-op.ll @@ -183,11 +183,11 @@ entry: ret void } -define void @func2() nounwind { +define void @func2(i16 %int_val) nounwind { entry: %val = alloca i16 %old = alloca i16 - store i16 31, i16* %val + store i16 %int_val, i16* %val ; CHECK: ldrex ; CHECK: cmp ; CHECK: strex @@ -197,7 +197,7 @@ entry: ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw umin i16* %val, i16 16 monotonic store i16 %0, i16* %old - %uneg = sub i16 0, 1 + %uneg = sub i16 0, 2 ; CHECK: ldrex ; CHECK: cmp ; CHECK: strex @@ -249,7 +249,7 @@ entry: ; CHECK-T1-M0: bl ___sync_fetch_and_umin_1 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync - %uneg = sub i8 0, 1 + %uneg = sub i8 0, 2 %1 = atomicrmw umin i8* %val, i8 %uneg monotonic store i8 %1, i8* %old ; CHECK: ldrex diff --git a/llvm/test/CodeGen/PowerPC/pr35688.ll b/llvm/test/CodeGen/PowerPC/pr35688.ll index 098573e..7573e87 100644 --- a/llvm/test/CodeGen/PowerPC/pr35688.ll +++ b/llvm/test/CodeGen/PowerPC/pr35688.ll @@ -6,16 +6,14 @@ ; Function Attrs: nounwind define void @ec_GFp_nistp256_points_mul() { ; CHECK-LABEL: ec_GFp_nistp256_points_mul: -; CHECK: ld 5, 0(3) -; CHECK: li 3, 127 -; CHECK: li 4, 0 -; CHECK: subfic 6, 5, 0 -; CHECK: subfze 6, 4 -; CHECK: sradi 7, 6, 63 -; CHECK: srad 6, 6, 3 -; CHECK: subfc 5, 5, 7 -; CHECK: subfe 5, 4, 6 +; CHECK: ld 4, 0(3) +; CHECK: li 3, 0 +; CHECK: subfic 5, 4, 0 +; CHECK: subfze 5, 3 ; CHECK: sradi 5, 5, 63 +; CHECK: subfc 4, 4, 5 +; CHECK: subfe 4, 3, 5 +; CHECK: sradi 4, 4, 63 ; With MemorySSA, everything is taken out of the loop by licm. ; Loads and stores to undef are treated as non-aliasing. diff --git a/llvm/test/CodeGen/SystemZ/subregliveness-04.ll b/llvm/test/CodeGen/SystemZ/subregliveness-04.ll index 11ecc9b..cb9ae9b 100644 --- a/llvm/test/CodeGen/SystemZ/subregliveness-04.ll +++ b/llvm/test/CodeGen/SystemZ/subregliveness-04.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -disable-early-taildup -disable-cgp -systemz-subreg-liveness < %s | FileCheck %s ; Check for successful compilation. -; CHECK: lhi %r0, -5 +; CHECK: lhi {{%r[0-9]+}}, -5 target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" target triple = "s390x-ibm-linux" diff --git a/llvm/test/CodeGen/X86/fold-tied-op.ll b/llvm/test/CodeGen/X86/fold-tied-op.ll index eb06eb7..6fe1713 100644 --- a/llvm/test/CodeGen/X86/fold-tied-op.ll +++ b/llvm/test/CodeGen/X86/fold-tied-op.ll @@ -6,8 +6,8 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" target triple = "i386--netbsd" ; CHECK-LABEL: fn1 -; CHECK: orl {{.*#+}} 4-byte Folded Reload ; CHECK: addl {{.*#+}} 4-byte Folded Reload +; CHECK: orl {{.*#+}} 4-byte Folded Reload ; CHECK: xorl {{.*#+}} 4-byte Folded Reload ; CHECK: xorl {{.*#+}} 4-byte Folded Reload ; CHECK: retl diff --git a/llvm/test/CodeGen/X86/pr28444.ll b/llvm/test/CodeGen/X86/pr28444.ll index 2338320..4d7d08a 100644 --- a/llvm/test/CodeGen/X86/pr28444.ll +++ b/llvm/test/CodeGen/X86/pr28444.ll @@ -11,9 +11,8 @@ define void @extractelt_mismatch_vector_element_type(i32 %arg, i1 %x) { ; CHECK-LABEL: extractelt_mismatch_vector_element_type: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: movb %al, (%rax) -; CHECK-NEXT: movb %al, (%rax) +; CHECK-NEXT: movb $1, (%rax) +; CHECK-NEXT: movb $1, (%rax) ; CHECK-NEXT: retq bb: %tmp = icmp ult i32 %arg, 0 -- 2.7.4