[DAG] SelectionDAG::getNode(N1,N2) - detect N2 constant vector splats as well as...

author Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 27 Jan 2022 10:59:03 +0000 (10:59 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 27 Jan 2022 10:59:08 +0000 (10:59 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 27 Jan 2022 10:59:03 +0000 (10:59 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 27 Jan 2022 10:59:08 +0000 (10:59 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index 28363e2..45f3005 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5639,8 +5639,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
        std::swap(N1, N2);
    }
  
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+  auto *N1C = dyn_cast<ConstantSDNode>(N1);
+  auto *N2C = dyn_cast<ConstantSDNode>(N2);
+
+  // Don't allow undefs in vector splats - we might be returning N2 when folding
+  // to zero etc.
+  ConstantSDNode *N2CV =
+      isConstOrConstSplat(N2, /*AllowUndefs*/ false, /*AllowTruncation*/ true);
  
    switch (Opcode) {
    default: break;
@@ -5671,9 +5676,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
             N1.getValueType() == VT && "Binary operator types must match!");
      // (X & 0) -> 0.  This commonly occurs when legalizing i64 values, so it's
      // worth handling here.
-    if (N2C && N2C->isZero())
+    if (N2CV && N2CV->isZero())
        return N2;
-    if (N2C && N2C->isAllOnes()) // X & -1 -> X
+    if (N2CV && N2CV->isAllOnes()) // X & -1 -> X
        return N1;
      break;
    case ISD::OR:
@@ -5685,7 +5690,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
             N1.getValueType() == VT && "Binary operator types must match!");
      // (X ^|+- 0) -> X.  This commonly occurs when legalizing i64 values, so
      // it's worth handling here.
-    if (N2C && N2C->isZero())
+    if (N2CV && N2CV->isZero())
        return N1;
      if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&
          VT.getVectorElementType() == MVT::i1)
@@ -5791,7 +5796,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
      // size of the value, the shift/rotate count is guaranteed to be zero.
      if (VT == MVT::i1)
        return N1;
-    if (N2C && N2C->isZero())
+    if (N2CV && N2CV->isZero())
        return N1;
      break;
    case ISD::FP_ROUND:
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll

index b6b9d02..6771380 100644 (file)
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -822,7 +822,6 @@ define <8 x i8> @vselect_constant_cond_zero_v8i8(<8 x i8> %a) {
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    movi d1, #0x00000000ff00ff
  ; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT:    orr v0.2s, #0
  ; CHECK-NEXT:    ret
    %b = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i8> %a, <8 x i8> zeroinitializer
    ret <8 x i8> %b
@@ -833,7 +832,6 @@ define <4 x i16> @vselect_constant_cond_zero_v4i16(<4 x i16> %a) {
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    movi d1, #0xffff00000000ffff
  ; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT:    orr v0.2s, #0
  ; CHECK-NEXT:    ret
    %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> %a, <4 x i16> zeroinitializer
    ret <4 x i16> %b
@@ -845,7 +843,6 @@ define <4 x i32> @vselect_constant_cond_zero_v4i32(<4 x i32> %a) {
  ; CHECK-NEXT:    adrp x8, .LCPI85_0
  ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI85_0]
  ; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    orr v0.4s, #0
  ; CHECK-NEXT:    ret
    %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> zeroinitializer
    ret <4 x i32> %b
diff --git a/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll b/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll

index f611c93..4ddf669 100644 (file)
--- a/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
+++ b/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
@@ -7,15 +7,13 @@ target triple = "armv7--linux-gnueabi"
  define void @function() {
  ; CHECK: cmp r0, #0
  ; CHECK: bxne lr
-; CHECK: vmov.i32 q8, #0xff0000
  entry:
    br i1 undef, label %vector.body, label %for.end
  
-; CHECK: vld1.32 {d18, d19}, [r0]
-; CHECK: vand q10, q9, q8
-; CHECK: vbic.i16 q9, #0xff
-; CHECK: vorr q9, q9, q10
-; CHECK: vst1.32 {d18, d19}, [r0]
+; CHECK: vld1.32 {d16, d17}, [r0]
+; CHECK: vbic.i32 q8, #0xff
+; CHECK: vorr q8, q8, q9
+; CHECK: vst1.32 {d16, d17}, [r0]
  vector.body:
    %wide.load = load <4 x i32>, <4 x i32>* undef, align 4
    %0 = and <4 x i32> %wide.load, <i32 -16711936, i32 -16711936, i32 -16711936, i32 -16711936>
diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll

index fa06df5..550c755 100644 (file)
--- a/llvm/test/CodeGen/X86/vec_smulo.ll
+++ b/llvm/test/CodeGen/X86/vec_smulo.ll
@@ -3282,9 +3282,9 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
  ; AVX-LABEL: smulo_v4i1:
  ; AVX:       # %bb.0:
  ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpslld $31, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskps %xmm0, %eax
-; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
+; AVX-NEXT:    vpslld $31, %xmm0, %xmm1
+; AVX-NEXT:    vpsrad $31, %xmm1, %xmm0
+; AVX-NEXT:    vmovmskps %xmm1, %eax
  ; AVX-NEXT:    movb %al, (%rdi)
  ; AVX-NEXT:    retq
  ;
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 27 Jan 2022 10:59:03 +0000 (10:59 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 27 Jan 2022 10:59:08 +0000 (10:59 +0000)
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll		patch \| blob \| history
llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vec_smulo.ll		patch \| blob \| history