recommit "[DAGCombiner] Teach scalarizeBinOpOfSplats handle scalable splat."

author jacquesguan <Jianjian.Guan@streamcomputing.com>

Thu, 7 Jul 2022 08:48:55 +0000 (16:48 +0800)

committer jacquesguan <Jianjian.Guan@streamcomputing.com>

Thu, 21 Jul 2022 09:34:34 +0000 (17:34 +0800)
author jacquesguan <Jianjian.Guan@streamcomputing.com>
Thu, 7 Jul 2022 08:48:55 +0000 (16:48 +0800)
committer jacquesguan <Jianjian.Guan@streamcomputing.com>
Thu, 21 Jul 2022 09:34:34 +0000 (17:34 +0800)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 4883b4b..06c633e 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -23471,10 +23471,14 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
    int Index0, Index1;
    SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
    SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
+  // Extract element from splat_vector should be free.
+  // TODO: use DAG.isSplatValue instead?
+  bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
+                           N1.getOpcode() == ISD::SPLAT_VECTOR;
    if (!Src0 || !Src1 || Index0 != Index1 ||
        Src0.getValueType().getVectorElementType() != EltVT ||
        Src1.getValueType().getVectorElementType() != EltVT ||
-      !TLI.isExtractVecEltCheap(VT, Index0) ||
+      !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
        !TLI.isOperationLegalOrCustom(Opcode, EltVT))
      return SDValue();
  
@@ -23496,6 +23500,8 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
    }
  
    // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
+  if (VT.isScalableVector())
+    return DAG.getSplatVector(VT, DL, ScalarBO);
    SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
    return DAG.getBuildVector(VT, DL, Ops);
  }
diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll

index 4ec31ee..1ddd24e 100644 (file)
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -111,16 +111,16 @@ define <vscale x 8 x i1> @lane_mask_nxv8i1_i8(i8 %index, i8 %TC) {
  define <vscale x 4 x i1> @lane_mask_nxv4i1_i8(i8 %index, i8 %TC) {
  ; CHECK-LABEL: lane_mask_nxv4i1_i8:
  ; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xff
  ; CHECK-NEXT:    index z0.s, #0, #1
-; CHECK-NEXT:    mov z1.s, w0
  ; CHECK-NEXT:    and z0.s, z0.s, #0xff
-; CHECK-NEXT:    and z1.s, z1.s, #0xff
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    and w8, w1, #0xff
  ; CHECK-NEXT:    add z0.s, z0.s, z1.s
-; CHECK-NEXT:    mov z1.s, w1
  ; CHECK-NEXT:    umin z0.s, z0.s, #255
-; CHECK-NEXT:    and z1.s, z1.s, #0xff
  ; CHECK-NEXT:    and z0.s, z0.s, #0xff
-; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z1.s, w8
  ; CHECK-NEXT:    cmphi p0.s, p0/z, z1.s, z0.s
  ; CHECK-NEXT:    ret
    %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i8(i8 %index, i8 %TC)
@@ -131,18 +131,18 @@ define <vscale x 2 x i1> @lane_mask_nxv2i1_i8(i8 %index, i8 %TC) {
  ; CHECK-LABEL: lane_mask_nxv2i1_i8:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    and x8, x0, #0xff
  ; CHECK-NEXT:    index z0.d, #0, #1
-; CHECK-NEXT:    mov z1.d, x0
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    and x9, x1, #0xff
  ; CHECK-NEXT:    and z0.d, z0.d, #0xff
-; CHECK-NEXT:    and z1.d, z1.d, #0xff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z1.d, x8
  ; CHECK-NEXT:    add z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT:    mov z2.d, x1
+; CHECK-NEXT:    mov z1.d, x9
  ; CHECK-NEXT:    umin z0.d, z0.d, #255
-; CHECK-NEXT:    and z2.d, z2.d, #0xff
  ; CHECK-NEXT:    and z0.d, z0.d, #0xff
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    cmphi p0.d, p0/z, z2.d, z0.d
+; CHECK-NEXT:    cmphi p0.d, p0/z, z1.d, z0.d
  ; CHECK-NEXT:    ret
    %active.lane.mask = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i8(i8 %index, i8 %TC)
    ret <vscale x 2 x i1> %active.lane.mask
diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll

index e4786bc..57fef74 100644 (file)
--- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
@@ -136,11 +136,9 @@ entry:
  define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #1 {
  ; CHECK-LABEL: splat_fdiv_nxv4f32:
  ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT:    fmov z2.s, #1.00000000
-; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmov s2, #1.00000000
+; CHECK-NEXT:    fdiv s0, s2, s0
  ; CHECK-NEXT:    mov z0.s, s0
-; CHECK-NEXT:    fdivr z0.s, p0/m, z0.s, z2.s
  ; CHECK-NEXT:    fmul z0.s, z1.s, z0.s
  ; CHECK-NEXT:    ret
  entry:
@@ -153,11 +151,9 @@ entry:
  define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #1 {
  ; CHECK-LABEL: splat_three_fdiv_nxv4f32:
  ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT:    fmov z4.s, #1.00000000
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z0.s, s0
-; CHECK-NEXT:    fdiv z4.s, p0/m, z4.s, z0.s
+; CHECK-NEXT:    fmov s4, #1.00000000
+; CHECK-NEXT:    fdiv s0, s4, s0
+; CHECK-NEXT:    mov z4.s, s0
  ; CHECK-NEXT:    fmul z0.s, z1.s, z4.s
  ; CHECK-NEXT:    fmul z1.s, z2.s, z4.s
  ; CHECK-NEXT:    fmul z2.s, z3.s, z4.s
@@ -190,11 +186,9 @@ entry:
  define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
  ; CHECK-LABEL: splat_two_fdiv_nxv2f64:
  ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    fmov z3.d, #1.00000000
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z0.d, d0
-; CHECK-NEXT:    fdiv z3.d, p0/m, z3.d, z0.d
+; CHECK-NEXT:    fmov d3, #1.00000000
+; CHECK-NEXT:    fdiv d0, d3, d0
+; CHECK-NEXT:    mov z3.d, d0
  ; CHECK-NEXT:    fmul z0.d, z1.d, z3.d
  ; CHECK-NEXT:    fmul z1.d, z2.d, z3.d
  ; CHECK-NEXT:    b foo_2_nxv2f64
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll

index 6d2d422..14cc314 100644 (file)
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
@@ -169,7 +169,6 @@ define void @select_v8f32(<8 x float>* %a, <8 x float>* %b, i1 %mask) vscale_ran
  ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
  ; CHECK-NEXT:    ptrue p1.s
  ; CHECK-NEXT:    mov z2.s, w8
-; CHECK-NEXT:    and z2.s, z2.s, #0x1
  ; CHECK-NEXT:    cmpne p1.s, p1/z, z2.s, #0
  ; CHECK-NEXT:    sel z0.s, p1, z0.s, z1.s
  ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
@@ -185,15 +184,14 @@ define void @select_v16f32(<16 x float>* %a, <16 x float>* %b, i1 %mask) #0 {
  ; VBITS_GE_256-LABEL: select_v16f32:
  ; VBITS_GE_256:       // %bb.0:
  ; VBITS_GE_256-NEXT:    mov x8, #8
-; VBITS_GE_256-NEXT:    and w9, w2, #0x1
  ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
+; VBITS_GE_256-NEXT:    and w9, w2, #0x1
  ; VBITS_GE_256-NEXT:    ptrue p1.s
  ; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
  ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
  ; VBITS_GE_256-NEXT:    ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
  ; VBITS_GE_256-NEXT:    ld1w { z3.s }, p0/z, [x1]
  ; VBITS_GE_256-NEXT:    mov z4.s, w9
-; VBITS_GE_256-NEXT:    and z4.s, z4.s, #0x1
  ; VBITS_GE_256-NEXT:    cmpne p1.s, p1/z, z4.s, #0
  ; VBITS_GE_256-NEXT:    sel z1.s, p1, z1.s, z3.s
  ; VBITS_GE_256-NEXT:    sel z0.s, p1, z0.s, z2.s
@@ -209,7 +207,6 @@ define void @select_v16f32(<16 x float>* %a, <16 x float>* %b, i1 %mask) #0 {
  ; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x1]
  ; VBITS_GE_512-NEXT:    ptrue p1.s
  ; VBITS_GE_512-NEXT:    mov z2.s, w8
-; VBITS_GE_512-NEXT:    and z2.s, z2.s, #0x1
  ; VBITS_GE_512-NEXT:    cmpne p1.s, p1/z, z2.s, #0
  ; VBITS_GE_512-NEXT:    sel z0.s, p1, z0.s, z1.s
  ; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
@@ -230,7 +227,6 @@ define void @select_v32f32(<32 x float>* %a, <32 x float>* %b, i1 %mask) vscale_
  ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
  ; CHECK-NEXT:    ptrue p1.s
  ; CHECK-NEXT:    mov z2.s, w8
-; CHECK-NEXT:    and z2.s, z2.s, #0x1
  ; CHECK-NEXT:    cmpne p1.s, p1/z, z2.s, #0
  ; CHECK-NEXT:    sel z0.s, p1, z0.s, z1.s
  ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
@@ -251,7 +247,6 @@ define void @select_v64f32(<64 x float>* %a, <64 x float>* %b, i1 %mask) vscale_
  ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
  ; CHECK-NEXT:    ptrue p1.s
  ; CHECK-NEXT:    mov z2.s, w8
-; CHECK-NEXT:    and z2.s, z2.s, #0x1
  ; CHECK-NEXT:    cmpne p1.s, p1/z, z2.s, #0
  ; CHECK-NEXT:    sel z0.s, p1, z0.s, z1.s
  ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
@@ -298,7 +293,6 @@ define void @select_v4f64(<4 x double>* %a, <4 x double>* %b, i1 %mask) vscale_r
  ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
  ; CHECK-NEXT:    ptrue p1.d
  ; CHECK-NEXT:    mov z2.d, x8
-; CHECK-NEXT:    and z2.d, z2.d, #0x1
  ; CHECK-NEXT:    cmpne p1.d, p1/z, z2.d, #0
  ; CHECK-NEXT:    sel z0.d, p1, z0.d, z1.d
  ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
@@ -314,15 +308,14 @@ define void @select_v8f64(<8 x double>* %a, <8 x double>* %b, i1 %mask) #0 {
  ; VBITS_GE_256-LABEL: select_v8f64:
  ; VBITS_GE_256:       // %bb.0:
  ; VBITS_GE_256-NEXT:    mov x8, #4
-; VBITS_GE_256-NEXT:    and w9, w2, #0x1
  ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
+; VBITS_GE_256-NEXT:    and w9, w2, #0x1
  ; VBITS_GE_256-NEXT:    ptrue p1.d
  ; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
  ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
  ; VBITS_GE_256-NEXT:    ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
  ; VBITS_GE_256-NEXT:    ld1d { z3.d }, p0/z, [x1]
  ; VBITS_GE_256-NEXT:    mov z4.d, x9
-; VBITS_GE_256-NEXT:    and z4.d, z4.d, #0x1
  ; VBITS_GE_256-NEXT:    cmpne p1.d, p1/z, z4.d, #0
  ; VBITS_GE_256-NEXT:    sel z1.d, p1, z1.d, z3.d
  ; VBITS_GE_256-NEXT:    sel z0.d, p1, z0.d, z2.d
@@ -338,7 +331,6 @@ define void @select_v8f64(<8 x double>* %a, <8 x double>* %b, i1 %mask) #0 {
  ; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x1]
  ; VBITS_GE_512-NEXT:    ptrue p1.d
  ; VBITS_GE_512-NEXT:    mov z2.d, x8
-; VBITS_GE_512-NEXT:    and z2.d, z2.d, #0x1
  ; VBITS_GE_512-NEXT:    cmpne p1.d, p1/z, z2.d, #0
  ; VBITS_GE_512-NEXT:    sel z0.d, p1, z0.d, z1.d
  ; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
@@ -359,7 +351,6 @@ define void @select_v16f64(<16 x double>* %a, <16 x double>* %b, i1 %mask) vscal
  ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
  ; CHECK-NEXT:    ptrue p1.d
  ; CHECK-NEXT:    mov z2.d, x8
-; CHECK-NEXT:    and z2.d, z2.d, #0x1
  ; CHECK-NEXT:    cmpne p1.d, p1/z, z2.d, #0
  ; CHECK-NEXT:    sel z0.d, p1, z0.d, z1.d
  ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
@@ -380,7 +371,6 @@ define void @select_v32f64(<32 x double>* %a, <32 x double>* %b, i1 %mask) vscal
  ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
  ; CHECK-NEXT:    ptrue p1.d
  ; CHECK-NEXT:    mov z2.d, x8
-; CHECK-NEXT:    and z2.d, z2.d, #0x1
  ; CHECK-NEXT:    cmpne p1.d, p1/z, z2.d, #0
  ; CHECK-NEXT:    sel z0.d, p1, z0.d, z1.d
  ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll

index 8b76c00..0194c9f 100644 (file)
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
@@ -298,7 +298,6 @@ define void @select_v8i32(<8 x i32>* %a, <8 x i32>* %b, i1 %mask) vscale_range(2
  ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
  ; CHECK-NEXT:    ptrue p1.s
  ; CHECK-NEXT:    mov z2.s, w8
-; CHECK-NEXT:    and z2.s, z2.s, #0x1
  ; CHECK-NEXT:    cmpne p1.s, p1/z, z2.s, #0
  ; CHECK-NEXT:    sel z0.s, p1, z0.s, z1.s
  ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
@@ -314,15 +313,14 @@ define void @select_v16i32(<16 x i32>* %a, <16 x i32>* %b, i1 %mask) #0 {
  ; VBITS_GE_256-LABEL: select_v16i32:
  ; VBITS_GE_256:       // %bb.0:
  ; VBITS_GE_256-NEXT:    mov x8, #8
-; VBITS_GE_256-NEXT:    and w9, w2, #0x1
  ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
+; VBITS_GE_256-NEXT:    and w9, w2, #0x1
  ; VBITS_GE_256-NEXT:    ptrue p1.s
  ; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
  ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
  ; VBITS_GE_256-NEXT:    ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
  ; VBITS_GE_256-NEXT:    ld1w { z3.s }, p0/z, [x1]
  ; VBITS_GE_256-NEXT:    mov z4.s, w9
-; VBITS_GE_256-NEXT:    and z4.s, z4.s, #0x1
  ; VBITS_GE_256-NEXT:    cmpne p1.s, p1/z, z4.s, #0
  ; VBITS_GE_256-NEXT:    sel z1.s, p1, z1.s, z3.s
  ; VBITS_GE_256-NEXT:    sel z0.s, p1, z0.s, z2.s
@@ -338,7 +336,6 @@ define void @select_v16i32(<16 x i32>* %a, <16 x i32>* %b, i1 %mask) #0 {
  ; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x1]
  ; VBITS_GE_512-NEXT:    ptrue p1.s
  ; VBITS_GE_512-NEXT:    mov z2.s, w8
-; VBITS_GE_512-NEXT:    and z2.s, z2.s, #0x1
  ; VBITS_GE_512-NEXT:    cmpne p1.s, p1/z, z2.s, #0
  ; VBITS_GE_512-NEXT:    sel z0.s, p1, z0.s, z1.s
  ; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
@@ -359,7 +356,6 @@ define void @select_v32i32(<32 x i32>* %a, <32 x i32>* %b, i1 %mask) vscale_rang
  ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
  ; CHECK-NEXT:    ptrue p1.s
  ; CHECK-NEXT:    mov z2.s, w8
-; CHECK-NEXT:    and z2.s, z2.s, #0x1
  ; CHECK-NEXT:    cmpne p1.s, p1/z, z2.s, #0
  ; CHECK-NEXT:    sel z0.s, p1, z0.s, z1.s
  ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
@@ -380,7 +376,6 @@ define void @select_v64i32(<64 x i32>* %a, <64 x i32>* %b, i1 %mask) vscale_rang
  ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
  ; CHECK-NEXT:    ptrue p1.s
  ; CHECK-NEXT:    mov z2.s, w8
-; CHECK-NEXT:    and z2.s, z2.s, #0x1
  ; CHECK-NEXT:    cmpne p1.s, p1/z, z2.s, #0
  ; CHECK-NEXT:    sel z0.s, p1, z0.s, z1.s
  ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
@@ -427,7 +422,6 @@ define void @select_v4i64(<4 x i64>* %a, <4 x i64>* %b, i1 %mask) vscale_range(2
  ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
  ; CHECK-NEXT:    ptrue p1.d
  ; CHECK-NEXT:    mov z2.d, x8
-; CHECK-NEXT:    and z2.d, z2.d, #0x1
  ; CHECK-NEXT:    cmpne p1.d, p1/z, z2.d, #0
  ; CHECK-NEXT:    sel z0.d, p1, z0.d, z1.d
  ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
@@ -443,15 +437,14 @@ define void @select_v8i64(<8 x i64>* %a, <8 x i64>* %b, i1 %mask) #0 {
  ; VBITS_GE_256-LABEL: select_v8i64:
  ; VBITS_GE_256:       // %bb.0:
  ; VBITS_GE_256-NEXT:    mov x8, #4
-; VBITS_GE_256-NEXT:    and w9, w2, #0x1
  ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
+; VBITS_GE_256-NEXT:    and w9, w2, #0x1
  ; VBITS_GE_256-NEXT:    ptrue p1.d
  ; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
  ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
  ; VBITS_GE_256-NEXT:    ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
  ; VBITS_GE_256-NEXT:    ld1d { z3.d }, p0/z, [x1]
  ; VBITS_GE_256-NEXT:    mov z4.d, x9
-; VBITS_GE_256-NEXT:    and z4.d, z4.d, #0x1
  ; VBITS_GE_256-NEXT:    cmpne p1.d, p1/z, z4.d, #0
  ; VBITS_GE_256-NEXT:    sel z1.d, p1, z1.d, z3.d
  ; VBITS_GE_256-NEXT:    sel z0.d, p1, z0.d, z2.d
@@ -467,7 +460,6 @@ define void @select_v8i64(<8 x i64>* %a, <8 x i64>* %b, i1 %mask) #0 {
  ; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x1]
  ; VBITS_GE_512-NEXT:    ptrue p1.d
  ; VBITS_GE_512-NEXT:    mov z2.d, x8
-; VBITS_GE_512-NEXT:    and z2.d, z2.d, #0x1
  ; VBITS_GE_512-NEXT:    cmpne p1.d, p1/z, z2.d, #0
  ; VBITS_GE_512-NEXT:    sel z0.d, p1, z0.d, z1.d
  ; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
@@ -488,7 +480,6 @@ define void @select_v16i64(<16 x i64>* %a, <16 x i64>* %b, i1 %mask) vscale_rang
  ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
  ; CHECK-NEXT:    ptrue p1.d
  ; CHECK-NEXT:    mov z2.d, x8
-; CHECK-NEXT:    and z2.d, z2.d, #0x1
  ; CHECK-NEXT:    cmpne p1.d, p1/z, z2.d, #0
  ; CHECK-NEXT:    sel z0.d, p1, z0.d, z1.d
  ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
@@ -509,7 +500,6 @@ define void @select_v32i64(<32 x i64>* %a, <32 x i64>* %b, i1 %mask) vscale_rang
  ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
  ; CHECK-NEXT:    ptrue p1.d
  ; CHECK-NEXT:    mov z2.d, x8
-; CHECK-NEXT:    and z2.d, z2.d, #0x1
  ; CHECK-NEXT:    cmpne p1.d, p1/z, z2.d, #0
  ; CHECK-NEXT:    sel z0.d, p1, z0.d, z1.d
  ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
diff --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll

index d2445af..209ebd3 100644 (file)
--- a/llvm/test/CodeGen/AArch64/sve-gep.ll
+++ b/llvm/test/CodeGen/AArch64/sve-gep.ll
@@ -56,8 +56,8 @@ define <2 x <vscale x 2 x i64>*> @fixed_of_scalable_2(<2 x <vscale x 2 x i64>*>
  define <vscale x 2 x i8*> @scalable_of_fixed_1(i8* %base) {
  ; CHECK-LABEL: scalable_of_fixed_1:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, x0
-; CHECK-NEXT:    add z0.d, z0.d, #1 // =0x1
+; CHECK-NEXT:    add x8, x0, #1
+; CHECK-NEXT:    mov z0.d, x8
  ; CHECK-NEXT:    ret
    %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
    %d = getelementptr i8, i8* %base, <vscale x 2 x i64> %idx
@@ -202,8 +202,8 @@ define <vscale x 2 x i64*> @scalable_of_fixed_5_i64(i64* %base, <vscale x 2 x i3
  define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_1(<vscale x 2 x i64>* %base) {
  ; CHECK-LABEL: scalable_of_scalable_1:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, x0
-; CHECK-NEXT:    incd z0.d, all, mul #8
+; CHECK-NEXT:    addvl x8, x0, #1
+; CHECK-NEXT:    mov z0.d, x8
  ; CHECK-NEXT:    ret
    %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
    %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, <vscale x 2 x i64> %idx
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-multiple.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-multiple.ll

index 4645ebc..bcab261 100644 (file)
--- a/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-multiple.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-multiple.ll
@@ -2,7 +2,7 @@
  
  ; This code generates a concat_vectors with more than 2 inputs. Make sure
  ; that this compiles successfully.
-; CHECK: vlsr
+; CHECK: lsr
  
  target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
  target triple = "hexagon"
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll

index ec52676..12bacea 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
@@ -879,9 +879,9 @@ define <vscale x 8 x i64> @vadd_xx_nxv8i64(i64 %a, i64 %b) nounwind {
  ;
  ; RV64-LABEL: vadd_xx_nxv8i64:
  ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
  ; RV64-NEXT:    vmv.v.x v8, a0
-; RV64-NEXT:    vadd.vx v8, v8, a1
  ; RV64-NEXT:    ret
    %head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
    %splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll

index 52b1e69..5c6d97d 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
@@ -1370,9 +1370,9 @@ define <vscale x 8 x i64> @vand_xx_nxv8i64(i64 %a, i64 %b) nounwind {
  ;
  ; RV64-LABEL: vand_xx_nxv8i64:
  ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
  ; RV64-NEXT:    vmv.v.x v8, a0
-; RV64-NEXT:    vand.vx v8, v8, a1
  ; RV64-NEXT:    ret
    %head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
    %splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll

index bcbf8ba..2682bf8 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
@@ -1,9 +1,9 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
  ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM
  
  ; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M
  
  define <vscale x 1 x i8> @vmul_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
  ; CHECK-LABEL: vmul_vv_nxv1i8:
@@ -939,12 +939,19 @@ define <vscale x 8 x i64> @vmul_xx_nxv8i64(i64 %a, i64 %b) nounwind {
  ; RV32-NEXT:    addi sp, sp, 16
  ; RV32-NEXT:    ret
  ;
-; RV64-LABEL: vmul_xx_nxv8i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
-; RV64-NEXT:    vmv.v.x v8, a0
-; RV64-NEXT:    vmul.vx v8, v8, a1
-; RV64-NEXT:    ret
+; RV64NOM-LABEL: vmul_xx_nxv8i64:
+; RV64NOM:       # %bb.0:
+; RV64NOM-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64NOM-NEXT:    vmv.v.x v8, a0
+; RV64NOM-NEXT:    vmul.vx v8, v8, a1
+; RV64NOM-NEXT:    ret
+;
+; RV64M-LABEL: vmul_xx_nxv8i64:
+; RV64M:       # %bb.0:
+; RV64M-NEXT:    mul a0, a0, a1
+; RV64M-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64M-NEXT:    vmv.v.x v8, a0
+; RV64M-NEXT:    ret
    %head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
    %splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
    %head2 = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll

index 57b558a..b932e17 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
@@ -1163,9 +1163,9 @@ define <vscale x 8 x i64> @vor_xx_nxv8i64(i64 %a, i64 %b) nounwind {
  ;
  ; RV64-LABEL: vor_xx_nxv8i64:
  ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    or a0, a0, a1
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
  ; RV64-NEXT:    vmv.v.x v8, a0
-; RV64-NEXT:    vor.vx v8, v8, a1
  ; RV64-NEXT:    ret
    %head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
    %splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll

index 4506055..01e4387 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
@@ -857,9 +857,9 @@ define <vscale x 8 x i64> @vsub_xx_nxv8i64(i64 %a, i64 %b) nounwind {
  ;
  ; RV64-LABEL: vsub_xx_nxv8i64:
  ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    sub a0, a0, a1
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
  ; RV64-NEXT:    vmv.v.x v8, a0
-; RV64-NEXT:    vsub.vx v8, v8, a1
  ; RV64-NEXT:    ret
    %head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
    %splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll

index e821456..c13b734 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
@@ -1370,9 +1370,9 @@ define <vscale x 8 x i64> @vxor_xx_nxv8i64(i64 %a, i64 %b) nounwind {
  ;
  ; RV64-LABEL: vxor_xx_nxv8i64:
  ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    xor a0, a0, a1
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
  ; RV64-NEXT:    vmv.v.x v8, a0
-; RV64-NEXT:    vxor.vx v8, v8, a1
  ; RV64-NEXT:    ret
    %head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
    %splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
author	jacquesguan <Jianjian.Guan@streamcomputing.com>
	Thu, 7 Jul 2022 08:48:55 +0000 (16:48 +0800)
committer	jacquesguan <Jianjian.Guan@streamcomputing.com>
	Thu, 21 Jul 2022 09:34:34 +0000 (17:34 +0800)
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/active_lane_mask.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/fdiv-combine.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/sve-gep.ll		patch \| blob \| history
llvm/test/CodeGen/Hexagon/autohvx/isel-concat-multiple.ll		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll		patch \| blob \| history