[DAGCombine] Fold Splat(bitcast(buildvector(x,..))) to splat(x)

author David Green <david.green@arm.com>

Mon, 12 Dec 2022 08:35:43 +0000 (08:35 +0000)

committer David Green <david.green@arm.com>

Mon, 12 Dec 2022 08:35:43 +0000 (08:35 +0000)
author David Green <david.green@arm.com>
Mon, 12 Dec 2022 08:35:43 +0000 (08:35 +0000)
committer David Green <david.green@arm.com>
Mon, 12 Dec 2022 08:35:43 +0000 (08:35 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 587405d..f9a7335 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -23021,6 +23021,23 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
          if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
            if (Idx->getAPIntValue() == SplatIndex)
              return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
+
+      // Look through a bitcast if LE and splatting lane 0, through to a
+      // scalar_to_vector or a build_vector.
+      if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
+          SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
+          (N0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR ||
+           N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR)) {
+        EVT N00VT = N0.getOperand(0).getValueType();
+        if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
+            VT.isInteger() && N00VT.isInteger()) {
+          EVT InVT =
+              TLI.getTypeToTransformTo(*DAG.getContext(), VT.getScalarType());
+          SDValue Op = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0),
+                                          SDLoc(N), InVT);
+          return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
+        }
+      }
      }
  
      // If this is a bit convert that changes the element type of the vector but
diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll

index 2c3af5b..0947730 100644 (file)
--- a/llvm/test/CodeGen/AArch64/arm64-dup.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll
@@ -508,8 +508,7 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind
  define <8 x i16> @bitcast_i64_v8i16(i64 %a) {
  ; CHECK-LABEL: bitcast_i64_v8i16:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov d0, x0
-; CHECK-NEXT:    dup.8h v0, v0[0]
+; CHECK-NEXT:    dup.8h v0, w0
  ; CHECK-NEXT:    ret
    %b = bitcast i64 %a to <4 x i16>
    %r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll

index 0994036..fdbd6f8 100644 (file)
--- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
@@ -2538,12 +2538,11 @@ define <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coer
  ; CHECK-LABEL: cmplx_mul_combined_re_im:
  ; CHECK:       // %bb.0: // %entry
  ; CHECK-NEXT:    lsr x8, x0, #16
+; CHECK-NEXT:    adrp x9, .LCPI196_0
  ; CHECK-NEXT:    fmov d4, x0
  ; CHECK-NEXT:    rev32 v5.8h, v0.8h
-; CHECK-NEXT:    fmov d1, x8
-; CHECK-NEXT:    adrp x8, .LCPI196_0
-; CHECK-NEXT:    dup v1.8h, v1.h[0]
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI196_0]
+; CHECK-NEXT:    dup v1.8h, w8
+; CHECK-NEXT:    ldr q3, [x9, :lo12:.LCPI196_0]
  ; CHECK-NEXT:    sqneg v2.8h, v1.8h
  ; CHECK-NEXT:    tbl v1.16b, { v1.16b, v2.16b }, v3.16b
  ; CHECK-NEXT:    sqdmull v2.4s, v0.4h, v4.h[0]
diff --git a/llvm/test/CodeGen/Thumb2/mve-vdup.ll b/llvm/test/CodeGen/Thumb2/mve-vdup.ll

index 74944b3..9ba3866 100644 (file)
--- a/llvm/test/CodeGen/Thumb2/mve-vdup.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
@@ -399,8 +399,6 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16(i64 %a) {
  define arm_aapcs_vfpcc <8 x i16> @bitcast_i128_v8i16(i128 %a) {
  ; CHECK-LE-LABEL: bitcast_i128_v8i16:
  ; CHECK-LE:       @ %bb.0:
-; CHECK-LE-NEXT:    vmov.32 q0[0], r0
-; CHECK-LE-NEXT:    vmov.u16 r0, q0[0]
  ; CHECK-LE-NEXT:    vdup.16 q0, r0
  ; CHECK-LE-NEXT:    bx lr
  ;
@@ -549,8 +547,6 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_v2f64_v8i16(<2 x i64> %a) {
  define arm_aapcs_vfpcc <8 x i16> @other_max_case(i32 %blockSize) {
  ; CHECK-LE-LABEL: other_max_case:
  ; CHECK-LE:       @ %bb.0:
-; CHECK-LE-NEXT:    vmov.32 q0[0], r0
-; CHECK-LE-NEXT:    vmov.u16 r0, q0[0]
  ; CHECK-LE-NEXT:    vdup.16 q0, r0
  ; CHECK-LE-NEXT:    bx lr
  ;
diff --git a/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll b/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll

index 662a7d7..2aebae5 100644 (file)
--- a/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
@@ -1,8 +1,7 @@
  ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
  
  ; Test that a splat shuffle of an fp-to-int bitcasted vector correctly
-; optimizes and lowers to a single splat instruction. Without a custom
-; DAG combine, this ends up doing both a splat and a shuffle.
+; optimizes and lowers to a single splat instruction.
  
  target triple = "wasm32-unknown-unknown"
  
@@ -19,8 +18,8 @@ define <4 x i32> @f32x4_splat(float %x) {
  
  ; CHECK-LABEL: not_a_vec:
  ; CHECK-NEXT: .functype not_a_vec (i64, i64) -> (v128){{$}}
-; CHECK-NEXT: i64x2.splat $push[[L1:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L1]], $2, 0, 1, 2, 3
+; CHECK-NEXT: i32.wrap_i64    $push[[L:[0-9]+]]=, $0
+; CHECK-NEXT: i32x4.splat     $push[[R:[0-9]+]]=, $pop[[L]]
  ; CHECK-NEXT: return $pop[[R]]
  define <4 x i32> @not_a_vec(i128 %x) {
    %a = bitcast i128 %x to <4 x i32>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll

index d294757..422f522 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
@@ -74,8 +74,8 @@ define void @test2() nounwind {
  ; X64-LABEL: test2:
  ; X64:       ## %bb.0: ## %entry
  ; X64-NEXT:    movq _tmp_V2i@GOTPCREL(%rip), %rax
-; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X64-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
  ; X64-NEXT:    movq %xmm0, (%rax)
  ; X64-NEXT:    retq
  entry:
author	David Green <david.green@arm.com>
	Mon, 12 Dec 2022 08:35:43 +0000 (08:35 +0000)
committer	David Green <david.green@arm.com>
	Mon, 12 Dec 2022 08:35:43 +0000 (08:35 +0000)
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/arm64-dup.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll		patch \| blob \| history
llvm/test/CodeGen/Thumb2/mve-vdup.ll		patch \| blob \| history
llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-mmx.ll		patch \| blob \| history