From 3ebbe3536386da9fcf2edfab794a090c572b0d3c Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Tue, 19 May 2020 12:55:53 +0000 Subject: [PATCH] [AArch64][SVE] Implement vector tuple intrinsics Summary: This patch adds the following intrinsics for creating two-tuple, three-tuple and four-tuple scalable vectors: * llvm.aarch64.sve.tuple.create2 * llvm.aarch64.sve.tuple.create3 * llvm.aarch64.sve.tuple.create4 As well as: * llvm.aarch64.sve.tuple.get * llvm.aarch64.sve.tuple.set For extracting and inserting scalable vectors from vector tuples. These intrinsics are intended to be used by the ACLE functions svcreate, svget and svset. This patch also includes calling convention support for passing and returning tuples of scalable vectors to/from functions. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D75674 --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 40 ++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 67 ++ .../AArch64/sve-calling-convention-tuple-types.ll | 499 +++++++++++++++ .../CodeGen/AArch64/sve-intrinsics-create-tuple.ll | 706 +++++++++++++++++++++ .../AArch64/sve-intrinsics-insert-extract-tuple.ll | 243 +++++++ 5 files changed, 1555 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 6065a8c..22a3c2e 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -789,6 +789,31 @@ def llvm_nxv2f64_ty : LLVMType; let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". + class AdvSIMD_SVE_Create_2Vector_Tuple + : Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>], + [IntrReadMem]>; + + class AdvSIMD_SVE_Create_3Vector_Tuple + : Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>], + [IntrReadMem]>; + + class AdvSIMD_SVE_Create_4Vector_Tuple + : Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, + LLVMMatchType<1>], + [IntrReadMem]>; + + class AdvSIMD_SVE_Set_Vector_Tuple + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty], + [IntrReadMem, ImmArg>]>; + + class AdvSIMD_SVE_Get_Vector_Tuple + : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; + class AdvSIMD_1Vec_PredLoad_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -1301,6 +1326,21 @@ class SVE_MatMul_Intrinsic [IntrNoMem]>; // +// Vector tuple creation intrinsics (ACLE) +// + +def int_aarch64_sve_tuple_create2 : AdvSIMD_SVE_Create_2Vector_Tuple; +def int_aarch64_sve_tuple_create3 : AdvSIMD_SVE_Create_3Vector_Tuple; +def int_aarch64_sve_tuple_create4 : AdvSIMD_SVE_Create_4Vector_Tuple; + +// +// Vector tuple insertion/extraction intrinsics (ACLE) +// + +def int_aarch64_sve_tuple_get : AdvSIMD_SVE_Get_Vector_Tuple; +def int_aarch64_sve_tuple_set : AdvSIMD_SVE_Set_Vector_Tuple; + +// // Loads // diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index bbacf90..5488d3c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13657,6 +13657,73 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_st1_scatter_scalar_offset: return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM); + case Intrinsic::aarch64_sve_tuple_get: { + SDLoc DL(N); + SDValue Chain = N->getOperand(0); + SDValue Src1 = N->getOperand(2); + SDValue Idx = N->getOperand(3); + + uint64_t IdxConst = cast(Idx)->getZExtValue(); + if (IdxConst > Src1->getNumOperands() - 1) + report_fatal_error("index larger than expected"); + + EVT ResVT = N->getValueType(0); + uint64_t NumLanes = ResVT.getVectorElementCount().Min; + SDValue Val = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, + DAG.getConstant(IdxConst * NumLanes, DL, MVT::i32)); + return DAG.getMergeValues({Val, Chain}, DL); + } + case Intrinsic::aarch64_sve_tuple_set: { + SDLoc DL(N); + SDValue Chain = N->getOperand(0); + SDValue Tuple = N->getOperand(2); + SDValue Idx = N->getOperand(3); + SDValue Vec = N->getOperand(4); + + EVT TupleVT = Tuple.getValueType(); + uint64_t TupleLanes = TupleVT.getVectorElementCount().Min; + + uint64_t IdxConst = cast(Idx)->getZExtValue(); + uint64_t NumLanes = Vec.getValueType().getVectorElementCount().Min; + + if ((TupleLanes % NumLanes) != 0) + report_fatal_error("invalid tuple vector!"); + + uint64_t NumVecs = TupleLanes / NumLanes; + + SmallVector Opnds; + for (unsigned I = 0; I < NumVecs; ++I) { + if (I == IdxConst) + Opnds.push_back(Vec); + else { + Opnds.push_back( + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, Vec.getValueType(), Tuple, + DAG.getConstant(I * NumLanes, DL, MVT::i32))); + } + } + SDValue Concat = + DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds); + return DAG.getMergeValues({Concat, Chain}, DL); + } + case Intrinsic::aarch64_sve_tuple_create2: + case Intrinsic::aarch64_sve_tuple_create3: + case Intrinsic::aarch64_sve_tuple_create4: { + SDLoc DL(N); + SDValue Chain = N->getOperand(0); + + SmallVector Opnds; + for (unsigned I = 2; I < N->getNumOperands(); ++I) + Opnds.push_back(N->getOperand(I)); + + EVT VT = Opnds[0].getValueType(); + EVT EltVT = VT.getVectorElementType(); + EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + VT.getVectorElementCount() * + (N->getNumOperands() - 2)); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds); + return DAG.getMergeValues({Concat, Chain}, DL); + } default: break; } diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll new file mode 100644 index 0000000..6c702b6 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll @@ -0,0 +1,499 @@ +; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s + +; +; svint8x2_t +; + +define @ret_svint8x2_t( %unused_z0, %z1, %z2) #0 { +; CHECK-LABEL: ret_svint8x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z1, %z2) + ret %tuple +} + +define void @call_svint8x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { +; CHECK-LABEL: call_svint8x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: bl callee_svint8x2_t + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z1, %z3) + call void @callee_svint8x2_t( %tuple) + ret void +} + +; +; svint16x2_t +; + +define @ret_svint16x2_t( %unused_z0, %z1, %z2) #0 { +; CHECK-LABEL: ret_svint16x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %z1, %z2) + ret %tuple +} + +define void @call_svint16x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { +; CHECK-LABEL: call_svint16x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: bl callee_svint16x2_t + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %z1, %z3) + call void @callee_svint16x2_t( %tuple) + ret void +} + +; +; svint32x2_t +; + +define @ret_svint32x2_t( %unused_z0, %z1, %z2) #0 { +; CHECK-LABEL: ret_svint32x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z1, %z2) + ret %tuple +} + +define void @call_svint32x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { +; CHECK-LABEL: call_svint32x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: bl callee_svint32x2_t + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z1, %z3) + call void @callee_svint32x2_t( %tuple) + ret void +} + +; +; svint64x2_t +; + +define @ret_svint64x2_t( %unused_z0, %z1, %z2) #0 { +; CHECK-LABEL: ret_svint64x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %z1, %z2) + ret %tuple +} + +define void @call_svint64x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { +; CHECK-LABEL: call_svint64x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: bl callee_svint64x2_t + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %z1, %z3) + call void @callee_svint64x2_t( %tuple) + ret void +} + +; +; svfloatx2_t +; + +define @ret_svfloatx2_t( %unused_z0, %z1, %z2) #0 { +; CHECK-LABEL: ret_svfloatx2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %z1, %z2) + ret %tuple +} + +define void @call_svfloatx2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { +; CHECK-LABEL: call_svfloatx2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: bl callee_svfloatx2_t + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %z1, %z3) + call void @callee_svfloatx2_t( %tuple) + ret void +} + +; +; svdoublex2_t +; + +define @ret_svdoublex2_t( %unused_z0, %z1, %z2) #0 { +; CHECK-LABEL: ret_svdoublex2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %z1, %z2) + ret %tuple +} + +define void @call_svdoublex2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { +; CHECK-LABEL: call_svdoublex2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: bl callee_svdoublex2_t + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %z1, %z3) + call void @callee_svdoublex2_t( %tuple) + ret void +} + +; +; svint8x3_t +; + +define @ret_svint8x3_t( %unused_z0, %z1, %z2, %z3) #0 { +; CHECK-LABEL: ret_svint8x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %z1, %z2, %z3) + ret %tuple +} + +define void @call_svint8x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { +; CHECK-LABEL: call_svint8x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint8x3_t + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %z1, %z2, %z4) + call void @callee_svint8x3_t( %tuple) + ret void +} + +; +; svint16x3_t +; + +define @ret_svint16x3_t( %unused_z0, %z1, %z2, %z3) #0 { +; CHECK-LABEL: ret_svint16x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z1, %z2, %z3) + ret %tuple +} + +define void @call_svint16x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { +; CHECK-LABEL: call_svint16x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint16x3_t + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z1, %z2, %z4) + call void @callee_svint16x3_t( %tuple) + ret void +} + +; +; svint32x3_t +; + +define @ret_svint32x3_t( %unused_z0, %z1, %z2, %z3) #0 { +; CHECK-LABEL: ret_svint32x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z1, %z2, %z3) + ret %tuple +} + +define void @call_svint32x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { +; CHECK-LABEL: call_svint32x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint32x3_t + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z1, %z2, %z4) + call void @callee_svint32x3_t( %tuple) + ret void +} + +; +; svint64x3_t +; + +define @ret_svint64x3_t( %unused_z0, %z1, %z2, %z3) #0 { +; CHECK-LABEL: ret_svint64x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %z1, %z2, %z3) + ret %tuple +} + +define void @call_svint64x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { +; CHECK-LABEL: call_svint64x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint64x3_t + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %z1, %z2, %z4) + call void @callee_svint64x3_t( %tuple) + ret void +} + +; +; svfloatx3_t +; + +define @ret_svfloatx3_t( %unused_z0, %z1, %z2, %z3) #0 { +; CHECK-LABEL: ret_svfloatx3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %z1, %z2, %z3) + ret %tuple +} + +define void @call_svfloatx3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { +; CHECK-LABEL: call_svfloatx3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svfloatx3_t + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %z1, %z2, %z4) + call void @callee_svfloatx3_t( %tuple) + ret void +} + +; +; svdoublex3_t +; + +define @ret_svdoublex3_t( %unused_z0, %z1, %z2, %z3) #0 { +; CHECK-LABEL: ret_svdoublex3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %z1, %z2, %z3) + ret %tuple +} + +define void @call_svdoublex3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { +; CHECK-LABEL: call_svdoublex3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svdoublex3_t + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %z1, %z2, %z4) + call void @callee_svdoublex3_t( %tuple) + ret void +} + +; +; svint8x4_t +; + +define @ret_svint8x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { +; CHECK-LABEL: ret_svint8x4_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z1, %z2, %z3, %z4) + ret %tuple +} + +define void @call_svint8x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { +; CHECK-LABEL: call_svint8x4_t +; CHECK: mov z3.d, z5.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint8x4_t + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z1, %z2, %z4, %z5) + call void @callee_svint8x4_t( %tuple) + ret void +} + +; +; svint16x4_t +; + +define @ret_svint16x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { +; CHECK-LABEL: ret_svint16x4_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %z1, %z2, %z3, %z4) + ret %tuple +} + +define void @call_svint16x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { +; CHECK-LABEL: call_svint16x4_t +; CHECK: mov z3.d, z5.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint16x4_t + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %z1, %z2, %z4, %z5) + call void @callee_svint16x4_t( %tuple) + ret void +} + +; +; svint32x4_t +; + +define @ret_svint32x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { +; CHECK-LABEL: ret_svint32x4_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z1, %z2, %z3, %z4) + ret %tuple +} + +define void @call_svint32x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { +; CHECK-LABEL: call_svint32x4_t +; CHECK: mov z3.d, z5.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint32x4_t + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z1, %z2, %z4, %z5) + call void @callee_svint32x4_t( %tuple) + ret void +} + +; +; svint64x4_t +; + +define @ret_svint64x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { +; CHECK-LABEL: ret_svint64x4_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %z1, %z2, %z3, %z4) + ret %tuple +} + +define void @call_svint64x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { +; CHECK-LABEL: call_svint64x4_t +; CHECK: mov z3.d, z5.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint64x4_t + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %z1, %z2, %z4, %z5) + call void @callee_svint64x4_t( %tuple) + ret void +} + +; +; svfloatx4_t +; + +define @ret_svfloatx4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { +; CHECK-LABEL: ret_svfloatx4_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %z1, %z2, %z3, %z4) + ret %tuple +} + +define void @call_svfloatx4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { +; CHECK-LABEL: call_svfloatx4_t +; CHECK: mov z3.d, z5.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svfloatx4_t + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %z1, %z2, %z4, %z5) + call void @callee_svfloatx4_t( %tuple) + ret void +} + +; +; svdoublex4_t +; + +define @ret_svdoublex4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { +; CHECK-LABEL: ret_svdoublex4_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %z1, %z2, %z3, %z4) + ret %tuple +} + +define void @call_svdoublex4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { +; CHECK-LABEL: call_svdoublex4_t +; CHECK: mov z3.d, z5.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svdoublex4_t + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %z1, %z2, %z4, %z5) + call void @callee_svdoublex4_t( %tuple) + ret void +} + +attributes #0 = { nounwind "target-features"="+sve" } + +declare void @callee_svint8x2_t() +declare void @callee_svint16x2_t() +declare void @callee_svint32x2_t() +declare void @callee_svint64x2_t() +declare void @callee_svfloatx2_t() +declare void @callee_svdoublex2_t() + +declare void @callee_svint8x3_t() +declare void @callee_svint16x3_t() +declare void @callee_svint32x3_t() +declare void @callee_svint64x3_t() +declare void @callee_svfloatx3_t() +declare void @callee_svdoublex3_t() + +declare void @callee_svint8x4_t() +declare void @callee_svint16x4_t() +declare void @callee_svint32x4_t() +declare void @callee_svint64x4_t() +declare void @callee_svfloatx4_t() +declare void @callee_svdoublex4_t() + + +; x2 +declare @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(, ) + +; x3 +declare @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(, , ) + +; x4 +declare @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(, , , ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll new file mode 100644 index 0000000..38b05e4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll @@ -0,0 +1,706 @@ +; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=1 < %s | FileCheck %s + +; +; SVCREATE2 (i8) +; + +define @test_svcreate2_s8_vec0(i1 %p, %z0, %z1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s8_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z0, %z1) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %tuple, i32 0) + ret %extract +} + +define @test_svcreate2_s8_vec1(i1 %p, %z0, %z1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s8_vec1: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z0, %z1) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %tuple, i32 1) + ret %extract +} + +; +; SVCREATE2 (i16) +; + +define @test_svcreate2_s16_vec0(i1 %p, %z0, %z1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s16_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %z0, %z1) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %tuple, i32 0) + ret %extract +} + +define @test_svcreate2_s16_vec1(i1 %p, %z0, %z1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s16_vec1: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %z0, %z1) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %tuple, i32 1) + ret %extract +} + +; +; SVCREATE2 (half) +; + +define @test_svcreate2_f16_vec0(i1 %p, %z0, %z1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_f16_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16( %z0, %z1) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16( %tuple, i32 0) + ret %extract +} + +define @test_svcreate2_f16_vec1(i1 %p, %z0, %z1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_f16_vec1: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16( %z0, %z1) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16( %tuple, i32 1) + ret %extract +} + +; +; SVCREATE2 (i32) +; + +define @test_svcreate2_s32_vec0(i1 %p, %z0, %z1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s32_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %tuple, i32 0) + ret %extract +} + +define @test_svcreate2_s32_vec1(i1 %p, %z0, %z1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s32_vec1: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %tuple, i32 1) + ret %extract +} + +; +; SVCREATE2 (float) +; + +define @test_svcreate2_f32_vec0(i1 %p, %z0, %z1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_f32_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %z0, %z1) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32( %tuple, i32 0) + ret %extract +} + +define @test_svcreate2_f32_vec1(i1 %p, %z0, %z1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_f32_vec1: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %z0, %z1) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32( %tuple, i32 1) + ret %extract +} + +; +; SVCREATE2 (i64) +; + +define @test_svcreate2_s64_vec0(i1 %p, %z0, %z1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s64_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %z0, %z1) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %tuple, i32 0) + ret %extract +} + +define @test_svcreate2_s64_vec1(i1 %p, %z0, %z1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s64_vec1: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %z0, %z1) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %tuple, i32 1) + ret %extract +} + +; +; SVCREATE2 (double) +; + +define @test_svcreate2_f64_vec0(i1 %p, %z0, %z1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_f64_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %z0, %z1) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %tuple, i32 0) + ret %extract +} + +define @test_svcreate2_f64_vec1(i1 %p, %z0, %z1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_f64_vec1: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %z0, %z1) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %tuple, i32 1) + ret %extract +} + +; +; SVCREATE3 (i8) +; + +define @test_svcreate3_s8_vec0(i1 %p, %z0, %z1, %z2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s8_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %z0, %z1, %z2) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %tuple, i32 0) + ret %extract +} + +define @test_svcreate3_s8_vec2(i1 %p, %z0, %z1, %z2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s8_vec2: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %z0, %z1, %z2) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %tuple, i32 2) + ret %extract +} + +; +; SVCREATE3 (i16) +; + +define @test_svcreate3_s16_vec0(i1 %p, %z0, %z1, %z2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s16_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z0, %z1, %z2) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %tuple, i32 0) + ret %extract +} + +define @test_svcreate3_s16_vec2(i1 %p, %z0, %z1, %z2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s16_vec2: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z0, %z1, %z2) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %tuple, i32 2) + ret %extract +} +; +; SVCREATE3 (half) +; + +define @test_svcreate3_f16_vec0(i1 %p, %z0, %z1, %z2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_f16_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16( %z0, %z1, %z2) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16( %tuple, i32 0) + ret %extract +} + +define @test_svcreate3_f16_vec2(i1 %p, %z0, %z1, %z2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_f16_vec2: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16( %z0, %z1, %z2) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16( %tuple, i32 2) + ret %extract +} + + +; +; SVCREATE3 (i32) +; + +define @test_svcreate3_s32_vec0(i1 %p, %z0, %z1, %z2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s32_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %tuple, i32 0) + ret %extract +} + +define @test_svcreate3_s32_vec2(i1 %p, %z0, %z1, %z2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s32_vec2: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %tuple, i32 2) + ret %extract +} + +; +; SVCREATE3 (float) +; + +define @test_svcreate3_f32_vec0(i1 %p, %z0, %z1, %z2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_f32_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %z0, %z1, %z2) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32( %tuple, i32 0) + ret %extract +} + +define @test_svcreate3_f32_vec2(i1 %p, %z0, %z1, %z2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_f32_vec2: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %z0, %z1, %z2) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32( %tuple, i32 2) + ret %extract +} + +; +; SVCREATE3 (i64) +; + +define @test_svcreate3_s64_vec0(i1 %p, %z0, %z1, %z2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s64_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %z0, %z1, %z2) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %tuple, i32 0) + ret %extract +} + +define @test_svcreate3_s64_vec2(i1 %p, %z0, %z1, %z2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s64_vec2: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %z0, %z1, %z2) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %tuple, i32 2) + ret %extract +} + +; +; SVCREATE3 (double) +; + +define @test_svcreate3_f64_vec0(i1 %p, %z0, %z1, %z2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_f64_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %z0, %z1, %z2) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64( %tuple, i32 0) + ret %extract +} + +define @test_svcreate3_f64_vec2(i1 %p, %z0, %z1, %z2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_f64_vec2: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %z0, %z1, %z2) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64( %tuple, i32 2) + ret %extract +} + +; +; SVCREATE4 (i8) +; + +define @test_svcreate4_s8_vec0(i1 %p, %z0, %z1, %z2, %z3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s8_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z0, %z1, %z2, %z3) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %tuple, i32 0) + ret %extract +} + +define @test_svcreate4_s8_vec3(i1 %p, %z0, %z1, %z2, %z3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s8_vec3: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z0, %z1, %z2, %z3) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %tuple, i32 3) + ret %extract +} + +; +; SVCREATE4 (i16) +; + +define @test_svcreate4_s16_vec0(i1 %p, %z0, %z1, %z2, %z3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s16_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %z0, %z1, %z2, %z3) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %tuple, i32 0) + ret %extract +} + +define @test_svcreate4_s16_vec3(i1 %p, %z0, %z1, %z2, %z3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s16_vec3: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %z0, %z1, %z2, %z3) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %tuple, i32 3) + ret %extract +} + +; +; SVCREATE4 (half) +; + +define @test_svcreate4_f16_vec0(i1 %p, %z0, %z1, %z2, %z3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_f16_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16( %z0, %z1, %z2, %z3) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %tuple, i32 0) + ret %extract +} + +define @test_svcreate4_f16_vec3(i1 %p, %z0, %z1, %z2, %z3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_f16_vec3: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16( %z0, %z1, %z2, %z3) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %tuple, i32 3) + ret %extract +} + +; +; SVCREATE4 (i32) +; + +define @test_svcreate4_s32_vec0(i1 %p, %z0, %z1, %z2, %z3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s32_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %tuple, i32 0) + ret %extract +} + +define @test_svcreate4_s32_vec3(i1 %p, %z0, %z1, %z2, %z3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s32_vec3: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %tuple, i32 3) + ret %extract +} + +; +; SVCREATE4 (float) +; + +define @test_svcreate4_f32_vec0(i1 %p, %z0, %z1, %z2, %z3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_f32_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %z0, %z1, %z2, %z3) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %tuple, i32 0) + ret %extract +} + +define @test_svcreate4_f32_vec3(i1 %p, %z0, %z1, %z2, %z3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_f32_vec3: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %z0, %z1, %z2, %z3) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %tuple, i32 3) + ret %extract +} + +; +; SVCREATE4 (i64) +; + +define @test_svcreate4_s64_vec0(i1 %p, %z0, %z1, %z2, %z3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s64_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %z0, %z1, %z2, %z3) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %tuple, i32 0) + ret %extract +} + +define @test_svcreate4_s64_vec3(i1 %p, %z0, %z1, %z2, %z3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s64_vec3: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %z0, %z1, %z2, %z3) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %tuple, i32 3) + ret %extract +} + +; +; SVCREATE4 (double) +; + +define @test_svcreate4_f64_vec0(i1 %p, %z0, %z1, %z2, %z3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_f64_vec0: +; CHECK: // %L2 +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %z0, %z1, %z2, %z3) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %tuple, i32 0) + ret %extract +} + +define @test_svcreate4_f64_vec3(i1 %p, %z0, %z1, %z2, %z3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_f64_vec3: +; CHECK: // %L2 +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %z0, %z1, %z2, %z3) + br i1 %p, label %L1, label %L2 +L1: + ret undef +L2: + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %tuple, i32 3) + ret %extract +} + +attributes #0 = { nounwind "target-features"="+sve" } + +declare @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(, ) + +declare @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(, , ) + +declare @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64 (, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(, , , ) + +declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(, i32 immarg) + +declare @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(, i32 immarg) + +declare @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(, i32 immarg) + +declare @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(, i32 immarg) + +declare @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(, i32 immarg) + +declare @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(, i32 immarg) + +declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(, i32 immarg) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll new file mode 100644 index 0000000..287f724 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll @@ -0,0 +1,243 @@ +; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s + +; All these tests create a vector tuple, insert z5 into one of the elements, +; and finally extracts that element from the wide vector to return it. These +; checks ensure that z5 is always the value that is returned. + +; +; Insert into two element tuples +; + +; tuple: { tuple2.res0, tuple2.res1 } +; insert z5: { z5 , tuple2.res1 } +; extract z5: ^^ +define @set_tuple2_nxv8i32_elt0( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple2_nxv8i32_elt0: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) + %ins = call @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32( %tuple, i32 0, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %ins, i32 0) + ret %ext +} + +; tuple: { tuple2.res0, tuple2.res1 } +; insert z5: { tuple2.res0, z5 } +; extract z5: ^^ +define @set_tuple2_nxv8i32_elt1( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple2_nxv8i32_elt1: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) + %ins = call @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32( %tuple, i32 1, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %ins, i32 1) + ret %ext +} + +; This test checks the elements _not_ being set aren't changed. + +; tuple: { tuple2.res0, tuple2.res1 } +; insert z5: { tuple2.res0, z5 } +; extract z0: ^^ +define @set_tuple2_nxv8i32_elt1_ret_elt0( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0: + ; CHECK-NEXT: ret + %tuple = call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) + %ins = call @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32( %tuple, i32 1, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %ins, i32 0) + ret %ext +} + +; Test extract of tuple passed into function +define @get_tuple2_nxv8i32_elt1( %tuple) #0 { + ; CHECK-LABEL: get_tuple2_nxv8i32_elt1: + ; CHECK-NEXT: mov z0.d, z1.d + ; CHECK-NEXT: ret + %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %tuple, i32 1) + ret %ext +} + +; +; Insert into three element tuples +; + +; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } +; insert z5: { z5 , tuple3.res0, tuple3.res2 } +; extract z5: ^^ +define @set_tuple3_nxv12i32_elt0( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple3_nxv12i32_elt0: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) + %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 0, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 0) + ret %ext +} + +; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } +; insert z5: { tuple3.res0, z5 , tuple3.res2 } +; extract z5: ^^ +define @set_tuple3_nxv12i32_elt1( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple3_nxv12i32_elt1: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) + %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 1, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 1) + ret %ext +} + +; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } +; insert z5: { tuple3.res0, tuple3.res1, z5 } +; extract z5: ^^ +define @set_tuple3_nxv12i32_elt2( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple3_nxv12i32_elt2: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) + %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 2, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 2) + ret %ext +} + +; This test checks the elements _not_ being set aren't changed. + +; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } +; insert z5: { tuple3.res0, z5 , tuple3.res2 } +; extract z2: ^^ +define @set_tuple3_nxv12i32_elt1_ret_elt2( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2: + ; CHECK-NEXT: mov z0.d, z2.d + ; CHECK-NEXT: ret + %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) + %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 1, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 2) + ret %ext +} + +; Test extract of tuple passed into function +define @get_tuple3_nxv12i32_elt2( %z0, %tuple) #0 { + ; CHECK-LABEL: get_tuple3_nxv12i32_elt2: + ; CHECK-NEXT: mov z0.d, z3.d + ; CHECK-NEXT: ret + %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %tuple, i32 2) + ret %ext +} + +; +; Insert into four element tuples +; + +; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } +; insert z5: { z5 , tuple4.res1, tuple4.res2, tuple4.res3 } +; extract z5: ^^ +define @set_tuple4_nxv16i32_elt0( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple4_nxv16i32_elt0: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) + %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 0, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 0) + ret %ext +} + +; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } +; insert z5: { tuple4.res0, z5 , tuple4.res2, tuple4.res3 } +; extract z5: ^^ +define @set_tuple4_nxv16i32_elt1( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple4_nxv16i32_elt1: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) + %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 1, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 1) + ret %ext +} + +; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } +; insert z5: { tuple4.res0, tuple4.res1, z5 , tuple4.res3 } +; extract z5: ^^ +define @set_tuple4_nxv16i32_elt2( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple4_nxv16i32_elt2: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) + %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 2, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 2) + ret %ext +} + +; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } +; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 } +; extract z5: ^^ +define @set_tuple4_nxv16i32_elt3( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple4_nxv16i32_elt3: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) + %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 3, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 3) + ret %ext +} + +; This test checks the elements _not_ being set aren't changed. + +; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } +; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 } +; extract z2: ^^ +define @set_tuple4_nxv16i32_elt3_ret_elt2( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2: + ; CHECK-NEXT: mov z0.d, z2.d + ; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) + %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 3, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 2) + ret %ext +} + +; Test extract of tuple passed into function +define @get_tuple4_nxv16i32_elt3( %tuple) #0 { + ; CHECK-LABEL: get_tuple4_nxv16i32_elt3: + ; CHECK-NEXT: mov z0.d, z3.d + ; CHECK-NEXT: ret + %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %tuple, i32 3) + ret %ext +} + +attributes #0 = { nounwind "target-features"="+sve" } + +declare @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(, ) +declare @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(, i32, ) +declare @llvm.aarch64.sve.tuple.get.nxv8i32(, i32) + +declare @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(, , ) +declare @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(, i32, ) +declare @llvm.aarch64.sve.tuple.get.nxv12i32(, i32) + +declare @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(, , , ) +declare @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(, i32, ) +declare @llvm.aarch64.sve.tuple.get.nxv16i32(, i32) -- 2.7.4