[AArch64] Add target DAG combine for UUNPKHI/LO

author David Sherwood <david.sherwood@arm.com>

Thu, 4 Nov 2021 15:20:16 +0000 (15:20 +0000)

committer David Sherwood <david.sherwood@arm.com>

Fri, 5 Nov 2021 13:50:59 +0000 (13:50 +0000)
author David Sherwood <david.sherwood@arm.com>
Thu, 4 Nov 2021 15:20:16 +0000 (15:20 +0000)
committer David Sherwood <david.sherwood@arm.com>
Fri, 5 Nov 2021 13:50:59 +0000 (13:50 +0000)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

index 64afe16..fef430f 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -15561,6 +15561,18 @@ static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) {
    return SDValue();
  }
  
+static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG) {
+  assert((N->getOpcode() == AArch64ISD::UUNPKHI ||
+          N->getOpcode() == AArch64ISD::UUNPKLO) &&
+         "Unexpected Opcode!");
+
+  // uunpklo/hi undef -> undef
+  if (N->getOperand(0).isUndef())
+    return DAG.getUNDEF(N->getValueType(0));
+
+  return SDValue();
+}
+
  static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
    SDLoc DL(N);
    SDValue Op0 = N->getOperand(0);
@@ -17227,6 +17239,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
      return performNVCASTCombine(N);
    case AArch64ISD::SPLICE:
      return performSpliceCombine(N, DAG);
+  case AArch64ISD::UUNPKLO:
+  case AArch64ISD::UUNPKHI:
+    return performUnpackCombine(N, DAG);
    case AArch64ISD::UZP1:
      return performUzpCombine(N, DAG);
    case AArch64ISD::SETCC_MERGE_ZERO:
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll

index 68e34dc..74f7c5e 100644 (file)
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -404,8 +404,7 @@ declare <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.nxv2f16(<vs
  define <vscale x 3 x i32> @insert_nxv3i32_nxv2i32(<vscale x 2 x i32> %sv0) {
  ; CHECK-LABEL: insert_nxv3i32_nxv2i32:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpkhi z1.d, z0.s
-; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
  ; CHECK-NEXT:    ret
    %v0 = call <vscale x 3 x i32> @llvm.experimental.vector.insert.nxv3i32.nxv2i32(<vscale x 3 x i32> undef, <vscale x 2 x i32> %sv0, i64 0)
   ret <vscale x 3 x i32> %v0
@@ -443,13 +442,9 @@ define <vscale x 6 x i32>  @insert_nxv6i32_nxv2i32(<vscale x 2 x i32> %sv0, <vsc
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
  ; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
  ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    uunpklo z1.d, z0.s
-; CHECK-NEXT:    ptrue p1.d
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
  ; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
-; CHECK-NEXT:    st1w { z1.d }, p1, [sp, #2, mul vl]
-; CHECK-NEXT:    ld1w { z0.s }, p0/z, [sp]
  ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [sp, #1, mul vl]
  ; CHECK-NEXT:    addvl sp, sp, #2
  ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
author	David Sherwood <david.sherwood@arm.com>
	Thu, 4 Nov 2021 15:20:16 +0000 (15:20 +0000)
committer	David Sherwood <david.sherwood@arm.com>
	Fri, 5 Nov 2021 13:50:59 +0000 (13:50 +0000)
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/sve-insert-vector.ll		patch \| blob \| history