From 546d002d7a5d0915ab449a3134b9737edc059378 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 5 Nov 2020 11:52:07 +0000 Subject: [PATCH] [GlobalISel] ComputeKnownBits - use common KnownBits shift handling (PR44526) Convert GISelKnownBits.computeKnownBitsImpl shift handling to use the common KnownBits implementations, which makes use of the known leading/trailing bits for shifted values in cases where we don't know the shift amount value, as detailed in https://blog.regehr.org/archives/1709 Differential Revision: https://reviews.llvm.org/D90527 --- llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp | 57 ++++++++----------- llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll | 76 ++++++++------------------ 2 files changed, 45 insertions(+), 88 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 862764f..97aae2d 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -369,44 +369,31 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits()); break; } - case TargetOpcode::G_ASHR: - case TargetOpcode::G_LSHR: - case TargetOpcode::G_SHL: { - KnownBits RHSKnown; + case TargetOpcode::G_ASHR: { + KnownBits LHSKnown, RHSKnown; + computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, + Depth + 1); computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, Depth + 1); - if (!RHSKnown.isConstant()) { - LLVM_DEBUG( - MachineInstr *RHSMI = MRI.getVRegDef(MI.getOperand(2).getReg()); - dbgs() << '[' << Depth << "] Shift not known constant: " << *RHSMI); - break; - } - uint64_t Shift = RHSKnown.getConstant().getZExtValue(); - LLVM_DEBUG(dbgs() << '[' << Depth << "] Shift is " << Shift << '\n'); - - // Guard against oversized shift amounts - if (Shift >= MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()) - break; - - computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Known = KnownBits::ashr(LHSKnown, RHSKnown); + break; + } + case TargetOpcode::G_LSHR: { + KnownBits LHSKnown, RHSKnown; + computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, Depth + 1); - - switch (Opcode) { - case TargetOpcode::G_ASHR: - Known.Zero = Known.Zero.ashr(Shift); - Known.One = Known.One.ashr(Shift); - break; - case TargetOpcode::G_LSHR: - Known.Zero = Known.Zero.lshr(Shift); - Known.One = Known.One.lshr(Shift); - Known.Zero.setBitsFrom(Known.Zero.getBitWidth() - Shift); - break; - case TargetOpcode::G_SHL: - Known.Zero = Known.Zero.shl(Shift); - Known.One = Known.One.shl(Shift); - Known.Zero.setBits(0, Shift); - break; - } + computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, + Depth + 1); + Known = KnownBits::lshr(LHSKnown, RHSKnown); + break; + } + case TargetOpcode::G_SHL: { + KnownBits LHSKnown, RHSKnown; + computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, + Depth + 1); + Known = KnownBits::shl(LHSKnown, RHSKnown); break; } case TargetOpcode::G_INTTOPTR: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll index 0b9a514..7fca201 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll @@ -549,8 +549,6 @@ define amdgpu_ps i32 @s_lshr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou ; GFX6-NEXT: s_and_b32 s2, s3, s4 ; GFX6-NEXT: s_and_b32 s1, s1, s4 ; GFX6-NEXT: s_lshr_b32 s1, s1, s2 -; GFX6-NEXT: s_and_b32 s1, s1, s4 -; GFX6-NEXT: s_and_b32 s0, s0, s4 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: ; return to shader part epilog @@ -592,8 +590,6 @@ define amdgpu_ps float @lshr_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) ; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 ; GFX6-NEXT: s_and_b32 s0, s1, s2 ; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 -; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 -; GFX6-NEXT: v_and_b32_e32 v0, s2, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: ; return to shader part epilog @@ -626,8 +622,6 @@ define amdgpu_ps float @lshr_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) ; GFX6-NEXT: s_and_b32 s0, s1, s2 ; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, s0, v1 -; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 -; GFX6-NEXT: v_and_b32_e32 v0, s2, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: ; return to shader part epilog @@ -674,18 +668,14 @@ define <2 x float> @v_lshr_v4i16(<4 x i16> %value, <4 x i16> %amount) { ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v4, v1 ; GFX6-NEXT: v_and_b32_e32 v4, s4, v6 ; GFX6-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 ; GFX6-NEXT: v_and_b32_e32 v4, s4, v7 ; GFX6-NEXT: v_and_b32_e32 v3, s4, v3 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3 -; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_and_b32_e32 v1, s4, v2 -; GFX6-NEXT: v_and_b32_e32 v2, s4, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 +; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_lshr_v4i16: @@ -722,18 +712,14 @@ define amdgpu_ps <2 x i32> @s_lshr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg ; GFX6-NEXT: s_lshr_b32 s1, s1, s4 ; GFX6-NEXT: s_and_b32 s4, s6, s8 ; GFX6-NEXT: s_and_b32 s2, s2, s8 -; GFX6-NEXT: s_and_b32 s1, s1, s8 ; GFX6-NEXT: s_lshr_b32 s2, s2, s4 ; GFX6-NEXT: s_and_b32 s4, s7, s8 ; GFX6-NEXT: s_and_b32 s3, s3, s8 -; GFX6-NEXT: s_lshr_b32 s3, s3, s4 -; GFX6-NEXT: s_and_b32 s0, s0, s8 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_lshr_b32 s3, s3, s4 ; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_and_b32 s1, s2, s8 -; GFX6-NEXT: s_and_b32 s2, s3, s8 -; GFX6-NEXT: s_lshl_b32 s2, s2, 16 -; GFX6-NEXT: s_or_b32 s1, s1, s2 +; GFX6-NEXT: s_lshl_b32 s1, s3, 16 +; GFX6-NEXT: s_or_b32 s1, s2, s1 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_lshr_v4i16: @@ -816,36 +802,28 @@ define <4 x float> @v_lshr_v8i16(<8 x i16> %value, <8 x i16> %amount) { ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v8, v2 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v11 ; GFX6-NEXT: v_and_b32_e32 v3, s4, v3 -; GFX6-NEXT: v_mov_b32_e32 v16, 0xffff ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v8, v3 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v12 ; GFX6-NEXT: v_and_b32_e32 v4, s4, v4 -; GFX6-NEXT: v_and_b32_e32 v1, v1, v16 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v13 ; GFX6-NEXT: v_and_b32_e32 v5, s4, v5 +; GFX6-NEXT: v_mov_b32_e32 v16, 0xffff ; GFX6-NEXT: v_lshrrev_b32_e32 v5, v8, v5 ; GFX6-NEXT: v_and_b32_e32 v8, s4, v14 ; GFX6-NEXT: v_and_b32_e32 v6, s4, v6 -; GFX6-NEXT: v_and_b32_e32 v0, v0, v16 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v6, v8, v6 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_and_b32_e32 v1, v2, v16 -; GFX6-NEXT: v_and_b32_e32 v2, v3, v16 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_and_b32_e32 v8, v15, v16 ; GFX6-NEXT: v_and_b32_e32 v7, v7, v16 -; GFX6-NEXT: v_and_b32_e32 v3, v5, v16 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, v8, v7 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX6-NEXT: v_and_b32_e32 v2, v4, v16 -; GFX6-NEXT: v_and_b32_e32 v4, v7, v16 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX6-NEXT: v_and_b32_e32 v3, v6, v16 -; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5 +; GFX6-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX6-NEXT: v_or_b32_e32 v3, v6, v3 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_lshr_v8i16: @@ -896,32 +874,24 @@ define amdgpu_ps <4 x i32> @s_lshr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg ; GFX6-NEXT: s_lshr_b32 s3, s3, s8 ; GFX6-NEXT: s_and_b32 s8, s12, s16 ; GFX6-NEXT: s_and_b32 s4, s4, s16 -; GFX6-NEXT: s_and_b32 s1, s1, s16 ; GFX6-NEXT: s_lshr_b32 s4, s4, s8 ; GFX6-NEXT: s_and_b32 s8, s13, s16 ; GFX6-NEXT: s_and_b32 s5, s5, s16 ; GFX6-NEXT: s_lshr_b32 s5, s5, s8 ; GFX6-NEXT: s_and_b32 s8, s14, s16 ; GFX6-NEXT: s_and_b32 s6, s6, s16 -; GFX6-NEXT: s_and_b32 s0, s0, s16 -; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_lshr_b32 s6, s6, s8 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_and_b32 s1, s2, s16 -; GFX6-NEXT: s_and_b32 s2, s3, s16 +; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_and_b32 s8, s15, s16 ; GFX6-NEXT: s_and_b32 s7, s7, s16 -; GFX6-NEXT: s_and_b32 s3, s5, s16 -; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_lshl_b32 s1, s3, 16 ; GFX6-NEXT: s_lshr_b32 s7, s7, s8 -; GFX6-NEXT: s_or_b32 s1, s1, s2 -; GFX6-NEXT: s_and_b32 s2, s4, s16 -; GFX6-NEXT: s_and_b32 s4, s7, s16 -; GFX6-NEXT: s_lshl_b32 s3, s3, 16 -; GFX6-NEXT: s_or_b32 s2, s2, s3 -; GFX6-NEXT: s_and_b32 s3, s6, s16 -; GFX6-NEXT: s_lshl_b32 s4, s4, 16 -; GFX6-NEXT: s_or_b32 s3, s3, s4 +; GFX6-NEXT: s_lshl_b32 s3, s7, 16 +; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_lshl_b32 s2, s5, 16 +; GFX6-NEXT: s_or_b32 s2, s4, s2 +; GFX6-NEXT: s_or_b32 s3, s6, s3 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_lshr_v8i16: -- 2.7.4