From 54945a12ecbb9575f57d147c48a158fc612c6ded Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 10 Dec 2016 17:00:00 +0000 Subject: [PATCH] [SelectionDAG] Add ability for computeKnownBits to peek through bitcasts from 'large element' scalar/vector to 'small element' vector. Extension to D27129 which already supported bitcasts from 'small element' vector to 'large element' scalar/vector types. llvm-svn: 289329 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 24 +++++++++++++++++++++++- llvm/test/CodeGen/X86/known-bits-vector.ll | 12 ++---------- llvm/test/CodeGen/X86/pr21792.ll | 6 +++--- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d80048c..45c9c59 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2186,7 +2186,29 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } } - // TODO - support ((SubBitWidth % BitWidth) == 0) when it becomes useful. + // Bitcast 'large element' scalar/vector to 'small element' vector. + if ((SubBitWidth % BitWidth) == 0) { + assert(Op.getValueType().isVector() && "Expected bitcast to vector"); + + // Collect known bits for the (smaller) output by collecting the known + // bits from the overlapping larger input elements and extracting the + // sub sections we actually care about. + unsigned SubScale = SubBitWidth / BitWidth; + APInt SubDemandedElts(NumElts / SubScale, 0); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SubDemandedElts.setBit(i / SubScale); + + computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts, Depth + 1); + + KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned Offset = (i % SubScale) * BitWidth; + KnownOne &= KnownOne2.lshr(Offset).trunc(BitWidth); + KnownZero &= KnownZero2.lshr(Offset).trunc(BitWidth); + } + } break; } case ISD::AND: diff --git a/llvm/test/CodeGen/X86/known-bits-vector.ll b/llvm/test/CodeGen/X86/known-bits-vector.ll index 717c7c6..5f15fb2 100644 --- a/llvm/test/CodeGen/X86/known-bits-vector.ll +++ b/llvm/test/CodeGen/X86/known-bits-vector.ll @@ -427,22 +427,14 @@ define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i3 ; X32: # BB#0: ; X32-NEXT: vpsrlq $1, %xmm0, %xmm0 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X32-NEXT: vpsrld $16, %xmm0, %xmm0 -; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: knownbits_lshr_bitcast_shuffle_uitofp: ; X64: # BB#0: ; X64-NEXT: vpsrlq $1, %xmm0, %xmm0 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X64-NEXT: vpsrld $16, %xmm0, %xmm0 -; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X64-NEXT: retq %1 = lshr <2 x i64> %a0, %2 = bitcast <2 x i64> %1 to <4 x i32> diff --git a/llvm/test/CodeGen/X86/pr21792.ll b/llvm/test/CodeGen/X86/pr21792.ll index 635de99..54f9cb3 100644 --- a/llvm/test/CodeGen/X86/pr21792.ll +++ b/llvm/test/CodeGen/X86/pr21792.ll @@ -13,17 +13,17 @@ define void @func(<4 x float> %vx) { ; CHECK-NEXT: .Lcfi0: ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 -; CHECK-NEXT: pextrq $1, %xmm0, %rcx +; CHECK-NEXT: pextrq $1, %xmm0, %rdx +; CHECK-NEXT: movq %rdx, %rcx +; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: movd %xmm0, %rax ; CHECK-NEXT: movq %rax, %r9 ; CHECK-NEXT: shrq $32, %r9 ; CHECK-NEXT: andl $2032, %eax # imm = 0x7F0 ; CHECK-NEXT: leaq stuff(%rax), %rdi ; CHECK-NEXT: leaq stuff(%r9), %rsi -; CHECK-NEXT: movl %ecx, %edx ; CHECK-NEXT: andl $2032, %edx # imm = 0x7F0 ; CHECK-NEXT: leaq stuff(%rdx), %rdx -; CHECK-NEXT: sarq $32, %rcx ; CHECK-NEXT: leaq stuff(%rcx), %rcx ; CHECK-NEXT: leaq stuff+8(%rax), %r8 ; CHECK-NEXT: leaq stuff+8(%r9), %r9 -- 2.7.4