if (Ty.getSizeInBits() == 64)
return &PPC::F8RCRegClass;
}
+ if (RB->getID() == PPC::VECRegBankID) {
+ if (Ty.getSizeInBits() == 128)
+ return &PPC::VSRCRegClass;
+ }
if (RB->getID() == PPC::CRRegBankID) {
if (Ty.getSizeInBits() == 1)
return &PPC::CRBITRCRegClass;
//===----------------------------------------------------------------------===//
#include "PPCLegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "ppc-legalinfo"
using namespace llvm;
using namespace LegalizeActions;
+using namespace LegalizeMutations;
+using namespace LegalityPredicates;
+
+static LegalityPredicate isRegisterType(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ unsigned TypeSize = QueryTy.getSizeInBits();
+
+ if (TypeSize % 32 == 1 || TypeSize > 128)
+ return false;
+
+ // Check if this is a legal PowerPC vector type.
+ if (QueryTy.isVector()) {
+ const int EltSize = QueryTy.getElementType().getSizeInBits();
+ return (EltSize == 8 || EltSize == 16 || EltSize == 32 || EltSize == 64);
+ }
+
+ return true;
+ };
+}
PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) {
using namespace TargetOpcode;
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
+ const LLT V16S8 = LLT::fixed_vector(16, 8);
+ const LLT V8S16 = LLT::fixed_vector(8, 16);
+ const LLT V4S32 = LLT::fixed_vector(4, 32);
+ const LLT V2S64 = LLT::fixed_vector(2, 64);
getActionDefinitionsBuilder(G_IMPLICIT_DEF).legalFor({S64});
getActionDefinitionsBuilder(G_CONSTANT)
.legalFor({S32, S64})
.legalForCartesianProduct({S64}, {S1, S8, S16, S32})
.clampScalar(0, S64, S64);
getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
- .legalFor({S64})
- .clampScalar(0, S64, S64);
+ .legalFor({S64, V4S32})
+ .clampScalar(0, S64, S64)
+ .bitcastIf(typeIsNot(0, V4S32), changeTo(0, V4S32));
getActionDefinitionsBuilder({G_ADD, G_SUB})
- .legalFor({S64})
+ .legalFor({S64, V16S8, V8S16, V4S32, V2S64})
.clampScalar(0, S64, S64);
+ getActionDefinitionsBuilder(G_BITCAST)
+ .legalIf(all(isRegisterType(0), isRegisterType(1)))
+ .lower();
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV})
- .legalFor({S32, S64});
+ .legalFor({S32, S64, V4S32, V2S64});
getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct({S1},
{S32, S64});
case PPC::VSSRCRegClassID:
case PPC::F4RCRegClassID:
return getRegBank(PPC::FPRRegBankID);
+ case PPC::VSRCRegClassID:
+ case PPC::VRRCRegClassID:
+ case PPC::VRRC_with_sub_64_in_SPILLTOVSRRCRegClassID:
+ case PPC::VSRC_with_sub_64_in_SPILLTOVSRRCRegClassID:
+ case PPC::SPILLTOVSRRCRegClassID:
+ case PPC::VSLRCRegClassID:
+ case PPC::VSLRC_with_sub_64_in_SPILLTOVSRRCRegClassID:
+ return getRegBank(PPC::VECRegBankID);
case PPC::CRRCRegClassID:
case PPC::CRBITRCRegClassID:
return getRegBank(PPC::CRRegBankID);
// Extension ops.
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
- case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_ANYEXT: {
assert(NumOperands <= 3 &&
"This code is for instructions with 3 or less operands");
- OperandsMapping = getValueMapping(PMI_GPR64);
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ unsigned Size = Ty.getSizeInBits();
+ switch (Size) {
+ case 128:
+ OperandsMapping = getValueMapping(PMI_VEC128);
+ break;
+ default:
+ OperandsMapping = getValueMapping(PMI_GPR64);
+ break;
+ }
break;
+ }
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
Register SrcReg = MI.getOperand(1).getReg();
unsigned Size = getSizeInBits(SrcReg, MRI, TRI);
- assert((Size == 32 || Size == 64) && "Unsupported floating point types!\n");
- OperandsMapping = getValueMapping(Size == 32 ? PMI_FPR32 : PMI_FPR64);
+ assert((Size == 32 || Size == 64 || Size == 128) &&
+ "Unsupported floating point types!\n");
+ switch (Size) {
+ case 32:
+ OperandsMapping = getValueMapping(PMI_FPR32);
+ break;
+ case 64:
+ OperandsMapping = getValueMapping(PMI_FPR64);
+ break;
+ case 128:
+ OperandsMapping = getValueMapping(PMI_VEC128);
+ break;
+ }
break;
}
case TargetOpcode::G_FCMP: {
OperandsMapping = getOperandsMapping(OpdsMapping);
break;
}
+ case TargetOpcode::G_BITCAST: {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ unsigned DstSize = DstTy.getSizeInBits();
+
+ bool DstIsGPR = !DstTy.isVector();
+ bool SrcIsGPR = !SrcTy.isVector();
+ // TODO: Currently, only vector and GPR register banks are handled.
+ // This needs to be extended to handle floating point register
+ // banks in the future.
+ const RegisterBank &DstRB = DstIsGPR ? PPC::GPRRegBank : PPC::VECRegBank;
+ const RegisterBank &SrcRB = SrcIsGPR ? PPC::GPRRegBank : PPC::VECRegBank;
+
+ return getInstructionMapping(
+ MappingID, Cost, getCopyMapping(DstRB.getID(), SrcRB.getID(), DstSize),
+ NumOperands);
+ }
default:
return getInvalidInstructionMapping();
}
PMI_GPR64 = 2,
PMI_FPR32 = 3,
PMI_FPR64 = 4,
- PMI_CR = 5,
+ PMI_VEC128 = 5,
+ PMI_CR = 6,
PMI_Min = PMI_GPR32,
};
def GPRRegBank : RegisterBank<"GPR", [G8RC, G8RC_NOX0]>;
/// Floating point Registers
def FPRRegBank : RegisterBank<"FPR", [VSSRC]>;
+/// Vector Registers
+def VECRegBank : RegisterBank<"VEC", [VSRC]>;
/// Condition Registers
def CRRegBank : RegisterBank<"CR", [CRRC]>;
{0, 32, PPC::FPRRegBank},
// 3: FPR 64-bit value
{0, 64, PPC::FPRRegBank},
- // 4: CR 4-bit value
+ // 4: 128-bit vector (VSX, Altivec)
+ {0, 128, PPC::VECRegBank},
+ // 5: CR 4-bit value
{0, 4, PPC::CRRegBank},
};
{&PPCGenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
{&PPCGenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
{&PPCGenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
- // 13: CR 4-bit value.
+ // 13: 128-bit vector.
+ {&PPCGenRegisterBankInfo::PartMappings[PMI_VEC128 - PMI_Min], 1},
+ {&PPCGenRegisterBankInfo::PartMappings[PMI_VEC128 - PMI_Min], 1},
+ {&PPCGenRegisterBankInfo::PartMappings[PMI_VEC128 - PMI_Min], 1},
+ // 16: CR 4-bit value.
{&PPCGenRegisterBankInfo::PartMappings[PMI_CR - PMI_Min], 1},
};
return &ValMappings[1 + 3 * ValMappingIdx];
}
+PPCGenRegisterBankInfo::PartialMappingIdx
+ PPCGenRegisterBankInfo::BankIDToCopyMapIdx[]{
+ PMI_None,
+ PMI_FPR64, // FPR
+ PMI_GPR64, // GPR
+ PMI_VEC128, // VEC
+};
+
// TODO Too simple!
const RegisterBankInfo::ValueMapping *
PPCGenRegisterBankInfo::getCopyMapping(unsigned DstBankID, unsigned SrcBankID,
unsigned Size) {
assert(DstBankID < PPC::NumRegisterBanks && "Invalid bank ID");
assert(SrcBankID < PPC::NumRegisterBanks && "Invalid bank ID");
+ PartialMappingIdx DstRBIdx = BankIDToCopyMapIdx[DstBankID];
+ PartialMappingIdx SrcRBIdx = BankIDToCopyMapIdx[SrcBankID];
+ assert(DstRBIdx != PMI_None && "No such mapping");
+ assert(SrcRBIdx != PMI_None && "No such mapping");
+
+ if (DstRBIdx == SrcRBIdx)
+ return getValueMapping(DstRBIdx);
- return &ValMappings[1];
+ assert(Size <= 128 && "Can currently handle types up to 128 bits (vectors)!");
+ // TODO: This function needs to be updated to handle all cases for
+ // GPRs, FPRs and vectors. It currently only handles bitcasting to
+ // the same type and has only mainly been tested for bitcasting
+ // between different vector types.
+ unsigned ValMappingIdx = DstRBIdx - PMI_Min;
+
+ return &ValMappings[1 + 3 * ValMappingIdx];
}
} // namespace llvm
%div = fdiv float %a, %b
ret float %div
}
+
+define <4 x float> @test_fadd_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_fadd_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvaddsp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fadd <4 x float> %a, %b
+ ret <4 x float> %res
+}
+
+define <2 x double> @test_fadd_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_fadd_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvadddp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fadd <2 x double> %a, %b
+ ret <2 x double> %res
+}
+
+define <4 x float> @test_fsub_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_fsub_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvsubsp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fsub <4 x float> %a, %b
+ ret <4 x float> %res
+}
+
+define <2 x double> @test_fsub_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_fsub_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvsubdp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fsub <2 x double> %a, %b
+ ret <2 x double> %res
+}
+
+define <4 x float> @test_fmul_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_fmul_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmulsp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fmul <4 x float> %a, %b
+ ret <4 x float> %res
+}
+
+define <2 x double> @test_fmul_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_fmul_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmuldp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fmul <2 x double> %a, %b
+ ret <2 x double> %res
+}
+
+define <4 x float> @test_fdiv_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_fdiv_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvdivsp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fdiv <4 x float> %a, %b
+ ret <4 x float> %res
+}
+
+define <2 x double> @test_fdiv_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_fdiv_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvdivdp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fdiv <2 x double> %a, %b
+ ret <2 x double> %res
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -global-isel -o - < %s | FileCheck %s
+
+define <16 x i8> @test_add_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_add_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vaddubm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = add <16 x i8> %a, %b
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @test_add_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_add_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vadduhm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = add <8 x i16> %a, %b
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @test_add_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_add_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vadduwm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = add <4 x i32> %a, %b
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @test_add_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_add_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vaddudm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = add <2 x i64> %a, %b
+ ret <2 x i64> %res
+}
+
+define <16 x i8> @test_sub_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_sub_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsububm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = sub <16 x i8> %a, %b
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @test_sub_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_sub_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsubuhm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = sub <8 x i16> %a, %b
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @test_sub_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_sub_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsubuwm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = sub <4 x i32> %a, %b
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @test_sub_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_sub_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsubudm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = sub <2 x i64> %a, %b
+ ret <2 x i64> %res
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -global-isel -o - < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -mattr=-vsx -global-isel -o - < %s | \
+; RUN: FileCheck %s --check-prefix=NO-VSX
+
+define <16 x i8> @test_and_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_and_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxland v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_and_v16i8:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vand v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = and <16 x i8> %a, %b
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_or_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_or_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_or_v16i8:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = or <16 x i8> %a, %b
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_xor_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_xor_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlxor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_xor_v16i8:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vxor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = xor <16 x i8> %a, %b
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @test_and_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_and_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxland v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_and_v8i16:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vand v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = and <8 x i16> %a, %b
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_or_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_or_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_or_v8i16:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = or <8 x i16> %a, %b
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_xor_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_xor_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlxor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_xor_v8i16:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vxor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = xor <8 x i16> %a, %b
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @test_and_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_and_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxland v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_and_v4i32:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vand v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = and <4 x i32> %a, %b
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_or_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_or_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_or_v4i32:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = or <4 x i32> %a, %b
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_xor_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_xor_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlxor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_xor_v4i32:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vxor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = xor <4 x i32> %a, %b
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @test_and_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_and_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxland v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_and_v2i64:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vand v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = and <2 x i64> %a, %b
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_or_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_or_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_or_v2i64:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = or <2 x i64> %a, %b
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_xor_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_xor_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlxor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_xor_v2i64:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vxor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = xor <2 x i64> %a, %b
+ ret <2 x i64> %res
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -global-isel -o - < %s | FileCheck %s
+
+; Test returning vectors in functions
+define <16 x i8> @test_ret_v16i8(<16 x i8> %a){
+; CHECK-LABEL: test_ret_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ ret <16 x i8> %a
+}
+
+define <8 x i16> @test_ret_v8i16(<8 x i16> %a){
+; CHECK-LABEL: test_ret_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ ret <8 x i16> %a
+}
+
+define <4 x i32> @test_ret_v4i32(<4 x i32> %a){
+; CHECK-LABEL: test_ret_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ ret <4 x i32> %a
+}
+
+define <2 x i64> @test_ret_v2i64(<2 x i64> %a){
+; CHECK-LABEL: test_ret_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ ret <2 x i64> %a
+}
+
+define <4 x float> @test_ret_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_ret_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ ret <4 x float> %a
+}
+
+define <2 x double> @test_ret_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_ret_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ ret <2 x double> %a
+}
+
+; Test simple bitcasting of vectors
+define <16 x i8> @test_bitcast_v16i8_v16i8(<16 x i8> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <16 x i8> %a to <16 x i8>
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v8i16(<8 x i16> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <8 x i16> %a to <16 x i8>
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v4i32(<4 x i32> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x i32> %a to <16 x i8>
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v2i64(<2 x i64> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x i64> %a to <16 x i8>
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x float> %a to <16 x i8>
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x double> %a to <16 x i8>
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v8i16_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <16 x i8> %a to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v8i16_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <8 x i16> %a to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v8i16_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x i32> %a to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v8i16_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x i64> %a to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v8i16_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x float> %a to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v8i16_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x double> %a to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v4i32_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <16 x i8> %a to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v4i32_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <8 x i16> %a to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v4i32_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x i32> %a to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v4i32_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x i64> %a to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v4i32_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x float> %a to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v4i32_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x double> %a to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v2i64_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <16 x i8> %a to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v2i64_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <8 x i16> %a to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v2i64_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x i32> %a to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v2i64_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x i64> %a to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v2i64_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x float> %a to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v2i64_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x double> %a to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v4f32_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <16 x i8> %a to <4 x float>
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v4f32_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <8 x i16> %a to <4 x float>
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v4f32_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x i32> %a to <4 x float>
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v4f32_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x i64> %a to <4 x float>
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v4f32_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x float> %a to <4 x float>
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v4f32_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x double> %a to <4 x float>
+ ret <4 x float> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v2f64_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <16 x i8> %a to <2 x double>
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v2f64_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <8 x i16> %a to <2 x double>
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v2f64_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x i32> %a to <2 x double>
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v2f64_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x i64> %a to <2 x double>
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v2f64_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x float> %a to <2 x double>
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v2f64_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x double> %a to <2 x double>
+ ret <2 x double> %res
+}
+