From 4b09d7a8ac1169dd3a3f13b126e0cdb2f3e2adb3 Mon Sep 17 00:00:00 2001 From: Zain Jaffal Date: Wed, 15 Mar 2023 20:47:53 +0000 Subject: [PATCH] [AArch64] Change GeneratePerfectShuffle to return one destination operand for zip and transpose operations. The tests added where crashing because zip instruction was returning two destination operands. ZIP according to arm returns only one destination operand. Reviewed By: dmgreen, fhahn Differential Revision: https://reviews.llvm.org/D146055 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 18 +++---- llvm/test/CodeGen/AArch64/arm64-zip.ll | 64 +++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f4f16a2..0d38577 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11052,23 +11052,17 @@ static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, DAG.getConstant(Imm, dl, MVT::i32)); } case OP_VUZPL: - return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS, - OpRHS); + return DAG.getNode(AArch64ISD::UZP1, dl, VT, OpLHS, OpRHS); case OP_VUZPR: - return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS, - OpRHS); + return DAG.getNode(AArch64ISD::UZP2, dl, VT, OpLHS, OpRHS); case OP_VZIPL: - return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS, - OpRHS); + return DAG.getNode(AArch64ISD::ZIP1, dl, VT, OpLHS, OpRHS); case OP_VZIPR: - return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS, - OpRHS); + return DAG.getNode(AArch64ISD::ZIP2, dl, VT, OpLHS, OpRHS); case OP_VTRNL: - return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS, - OpRHS); + return DAG.getNode(AArch64ISD::TRN1, dl, VT, OpLHS, OpRHS); case OP_VTRNR: - return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS, - OpRHS); + return DAG.getNode(AArch64ISD::TRN2, dl, VT, OpLHS, OpRHS); } } diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll index 02508c2..0584739 100644 --- a/llvm/test/CodeGen/AArch64/arm64-zip.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll @@ -274,3 +274,67 @@ define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) { %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> ret <16 x i8> %3 } + +define <4 x float> @shuffle_zip1(<4 x float> %arg) { +; CHECK-LABEL: shuffle_zip1: +; CHECK: // %bb.0: // %bb +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: fcmgt.4s v0, v0, v1 +; CHECK-NEXT: uzp1.8h v1, v0, v0 +; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: xtn.4h v1, v1 +; CHECK-NEXT: zip2.4h v0, v0, v1 +; CHECK-NEXT: fmov.4s v1, #1.00000000 +; CHECK-NEXT: zip1.4h v0, v0, v0 +; CHECK-NEXT: sshll.4s v0, v0, #0 +; CHECK-NEXT: and.16b v0, v1, v0 +; CHECK-NEXT: ret +bb: + %inst = fcmp olt <4 x float> zeroinitializer, %arg + %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> + %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> + %inst3 = select <4 x i1> %inst2, <4 x float> , <4 x float> zeroinitializer + ret <4 x float> %inst3 +} + +define <4 x i32> @shuffle_zip2(<4 x i32> %arg) { +; CHECK-LABEL: shuffle_zip2: +; CHECK: // %bb.0: // %bb +; CHECK-NEXT: cmtst.4s v0, v0, v0 +; CHECK-NEXT: uzp1.8h v1, v0, v0 +; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: xtn.4h v1, v1 +; CHECK-NEXT: zip2.4h v0, v0, v1 +; CHECK-NEXT: movi.4s v1, #1 +; CHECK-NEXT: zip1.4h v0, v0, v0 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: ret +bb: + %inst = icmp ult <4 x i32> zeroinitializer, %arg + %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> + %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> + %inst3 = select <4 x i1> %inst2, <4 x i32> , <4 x i32> zeroinitializer + ret <4 x i32> %inst3 +} + +define <4 x i32> @shuffle_zip3(<4 x i32> %arg) { +; CHECK-LABEL: shuffle_zip3: +; CHECK: // %bb.0: // %bb +; CHECK-NEXT: cmgt.4s v0, v0, #0 +; CHECK-NEXT: uzp1.8h v1, v0, v0 +; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: xtn.4h v1, v1 +; CHECK-NEXT: zip2.4h v0, v0, v1 +; CHECK-NEXT: movi.4s v1, #1 +; CHECK-NEXT: zip1.4h v0, v0, v0 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: ret +bb: + %inst = icmp slt <4 x i32> zeroinitializer, %arg + %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> + %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> + %inst3 = select <4 x i1> %inst2, <4 x i32> , <4 x i32> zeroinitializer + ret <4 x i32> %inst3 +} -- 2.7.4