From 3508f123353c0a145ee79cebb972f46fcb97bf1e Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1@ibm.com>
Date: Sat, 25 Mar 2023 23:04:53 -0500
Subject: [PATCH] [PowerPC][GISel] Add initial GlobalISel support for vector
 functions.

This patch adds the initial support for vector functions and register banks
within GlobalISel. With this patch, we are able to support simple functions that
return vectors, and also functions that perform simple operations.

This patch also:
- Legalizes vector types for G_AND, G_OR, G_XOR, G_ADD, G_SUB, G_BITCAST, G_FADD, G_FSUB
- Introduce initial support for bitcasting (that will need to be extended upon)
- Add various different test cases to for test vector support within GlobalISel

Differential Revision: https://reviews.llvm.org/D137785
---
 .../PowerPC/GISel/PPCInstructionSelector.cpp       |   4 +
 llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp |  37 ++-
 .../Target/PowerPC/GISel/PPCRegisterBankInfo.cpp   |  54 +++-
 .../lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h |   3 +-
 llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td  |   2 +
 llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def |  34 ++-
 .../CodeGen/PowerPC/GlobalISel/float-arithmetic.ll |  72 +++++
 .../PowerPC/GlobalISel/ppc-isel-arithmentic-vec.ll |  75 +++++
 .../PowerPC/GlobalISel/ppc-isel-logical-vec.ll     | 174 +++++++++++
 .../GlobalISel/ppc-isel-ret-and-bitcast-vec.ll     | 336 +++++++++++++++++++++
 10 files changed, 779 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-arithmentic-vec.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-logical-vec.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-ret-and-bitcast-vec.ll

diff --git a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
index 97f3e09..24f02a3 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
@@ -113,6 +113,10 @@ static const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank *RB) {
     if (Ty.getSizeInBits() == 64)
       return &PPC::F8RCRegClass;
   }
+  if (RB->getID() == PPC::VECRegBankID) {
+    if (Ty.getSizeInBits() == 128)
+      return &PPC::VSRCRegClass;
+  }
   if (RB->getID() == PPC::CRRegBankID) {
     if (Ty.getSizeInBits() == 1)
       return &PPC::CRBITRCRegClass;
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
index 2e0d12c..6b24c2a 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
@@ -10,12 +10,33 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCLegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
 #include "llvm/Support/Debug.h"
 
 #define DEBUG_TYPE "ppc-legalinfo"
 
 using namespace llvm;
 using namespace LegalizeActions;
+using namespace LegalizeMutations;
+using namespace LegalityPredicates;
+
+static LegalityPredicate isRegisterType(unsigned TypeIdx) {
+  return [=](const LegalityQuery &Query) {
+    const LLT QueryTy = Query.Types[TypeIdx];
+    unsigned TypeSize = QueryTy.getSizeInBits();
+
+    if (TypeSize % 32 == 1 || TypeSize > 128)
+      return false;
+
+    // Check if this is a legal PowerPC vector type.
+    if (QueryTy.isVector()) {
+      const int EltSize = QueryTy.getElementType().getSizeInBits();
+      return (EltSize == 8 || EltSize == 16 || EltSize == 32 || EltSize == 64);
+    }
+
+    return true;
+  };
+}
 
 PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) {
   using namespace TargetOpcode;
@@ -25,6 +46,10 @@ PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) {
   const LLT S16 = LLT::scalar(16);
   const LLT S32 = LLT::scalar(32);
   const LLT S64 = LLT::scalar(64);
+  const LLT V16S8 = LLT::fixed_vector(16, 8);
+  const LLT V8S16 = LLT::fixed_vector(8, 16);
+  const LLT V4S32 = LLT::fixed_vector(4, 32);
+  const LLT V2S64 = LLT::fixed_vector(2, 64);
   getActionDefinitionsBuilder(G_IMPLICIT_DEF).legalFor({S64});
   getActionDefinitionsBuilder(G_CONSTANT)
       .legalFor({S32, S64})
@@ -33,14 +58,18 @@ PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) {
       .legalForCartesianProduct({S64}, {S1, S8, S16, S32})
       .clampScalar(0, S64, S64);
   getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
-      .legalFor({S64})
-      .clampScalar(0, S64, S64);
+      .legalFor({S64, V4S32})
+      .clampScalar(0, S64, S64)
+      .bitcastIf(typeIsNot(0, V4S32), changeTo(0, V4S32));
   getActionDefinitionsBuilder({G_ADD, G_SUB})
-      .legalFor({S64})
+      .legalFor({S64, V16S8, V8S16, V4S32, V2S64})
       .clampScalar(0, S64, S64);
+  getActionDefinitionsBuilder(G_BITCAST)
+      .legalIf(all(isRegisterType(0), isRegisterType(1)))
+      .lower();
 
   getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV})
-      .legalFor({S32, S64});
+      .legalFor({S32, S64, V4S32, V2S64});
 
   getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct({S1},
                                                                {S32, S64});
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
index 0ae44ec..25587b3 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
@@ -48,6 +48,14 @@ PPCRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
   case PPC::VSSRCRegClassID:
   case PPC::F4RCRegClassID:
     return getRegBank(PPC::FPRRegBankID);
+  case PPC::VSRCRegClassID:
+  case PPC::VRRCRegClassID:
+  case PPC::VRRC_with_sub_64_in_SPILLTOVSRRCRegClassID:
+  case PPC::VSRC_with_sub_64_in_SPILLTOVSRRCRegClassID:
+  case PPC::SPILLTOVSRRCRegClassID:
+  case PPC::VSLRCRegClassID:
+  case PPC::VSLRC_with_sub_64_in_SPILLTOVSRRCRegClassID:
+    return getRegBank(PPC::VECRegBankID);
   case PPC::CRRCRegClassID:
   case PPC::CRBITRCRegClassID:
     return getRegBank(PPC::CRRegBankID);
@@ -90,11 +98,21 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     // Extension ops.
   case TargetOpcode::G_SEXT:
   case TargetOpcode::G_ZEXT:
-  case TargetOpcode::G_ANYEXT:
+  case TargetOpcode::G_ANYEXT: {
     assert(NumOperands <= 3 &&
            "This code is for instructions with 3 or less operands");
-    OperandsMapping = getValueMapping(PMI_GPR64);
+    LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+    unsigned Size = Ty.getSizeInBits();
+    switch (Size) {
+    case 128:
+      OperandsMapping = getValueMapping(PMI_VEC128);
+      break;
+    default:
+      OperandsMapping = getValueMapping(PMI_GPR64);
+      break;
+    }
     break;
+  }
   case TargetOpcode::G_FADD:
   case TargetOpcode::G_FSUB:
   case TargetOpcode::G_FMUL:
@@ -102,8 +120,19 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     Register SrcReg = MI.getOperand(1).getReg();
     unsigned Size = getSizeInBits(SrcReg, MRI, TRI);
 
-    assert((Size == 32 || Size == 64) && "Unsupported floating point types!\n");
-    OperandsMapping = getValueMapping(Size == 32 ? PMI_FPR32 : PMI_FPR64);
+    assert((Size == 32 || Size == 64 || Size == 128) &&
+           "Unsupported floating point types!\n");
+    switch (Size) {
+    case 32:
+      OperandsMapping = getValueMapping(PMI_FPR32);
+      break;
+    case 64:
+      OperandsMapping = getValueMapping(PMI_FPR64);
+      break;
+    case 128:
+      OperandsMapping = getValueMapping(PMI_VEC128);
+      break;
+    }
     break;
   }
   case TargetOpcode::G_FCMP: {
@@ -185,6 +214,23 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     OperandsMapping = getOperandsMapping(OpdsMapping);
     break;
   }
+  case TargetOpcode::G_BITCAST: {
+    LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+    LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+    unsigned DstSize = DstTy.getSizeInBits();
+
+    bool DstIsGPR = !DstTy.isVector();
+    bool SrcIsGPR = !SrcTy.isVector();
+    // TODO: Currently, only vector and GPR register banks are handled.
+    //       This needs to be extended to handle floating point register
+    //       banks in the future.
+    const RegisterBank &DstRB = DstIsGPR ? PPC::GPRRegBank : PPC::VECRegBank;
+    const RegisterBank &SrcRB = SrcIsGPR ? PPC::GPRRegBank : PPC::VECRegBank;
+
+    return getInstructionMapping(
+        MappingID, Cost, getCopyMapping(DstRB.getID(), SrcRB.getID(), DstSize),
+        NumOperands);
+  }
   default:
     return getInvalidInstructionMapping();
   }
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
index 885bdcb..c2a16c9 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
@@ -32,7 +32,8 @@ protected:
     PMI_GPR64 = 2,
     PMI_FPR32 = 3,
     PMI_FPR64 = 4,
-    PMI_CR = 5,
+    PMI_VEC128 = 5,
+    PMI_CR = 6,
     PMI_Min = PMI_GPR32,
   };
 
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td
index 16f3bd8..f2237d8 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td
@@ -15,5 +15,7 @@
 def GPRRegBank : RegisterBank<"GPR", [G8RC, G8RC_NOX0]>;
 /// Floating point Registers
 def FPRRegBank : RegisterBank<"FPR", [VSSRC]>;
+/// Vector Registers
+def VECRegBank : RegisterBank<"VEC", [VSRC]>;
 /// Condition Registers
 def CRRegBank : RegisterBank<"CR", [CRRC]>;
diff --git a/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def b/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def
index f7e79ae..eff4432 100644
--- a/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def
+++ b/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def
@@ -22,7 +22,9 @@ RegisterBankInfo::PartialMapping PPCGenRegisterBankInfo::PartMappings[]{
     {0, 32, PPC::FPRRegBank},
     // 3: FPR 64-bit value
     {0, 64, PPC::FPRRegBank},
-    // 4: CR 4-bit value
+    // 4: 128-bit vector (VSX, Altivec)
+    {0, 128, PPC::VECRegBank},
+    // 5: CR 4-bit value
     {0, 4, PPC::CRRegBank},
 };
 
@@ -57,7 +59,11 @@ RegisterBankInfo::ValueMapping PPCGenRegisterBankInfo::ValMappings[]{
     {&PPCGenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
     {&PPCGenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
     {&PPCGenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
-    // 13: CR 4-bit value.
+    // 13: 128-bit vector.
+    {&PPCGenRegisterBankInfo::PartMappings[PMI_VEC128 - PMI_Min], 1},
+    {&PPCGenRegisterBankInfo::PartMappings[PMI_VEC128 - PMI_Min], 1},
+    {&PPCGenRegisterBankInfo::PartMappings[PMI_VEC128 - PMI_Min], 1},
+    // 16: CR 4-bit value.
     {&PPCGenRegisterBankInfo::PartMappings[PMI_CR - PMI_Min], 1},
 };
 
@@ -71,14 +77,36 @@ PPCGenRegisterBankInfo::getValueMapping(PartialMappingIdx RBIdx) {
   return &ValMappings[1 + 3 * ValMappingIdx];
 }
 
+PPCGenRegisterBankInfo::PartialMappingIdx
+  PPCGenRegisterBankInfo::BankIDToCopyMapIdx[]{
+    PMI_None,
+    PMI_FPR64,  // FPR
+    PMI_GPR64,  // GPR
+    PMI_VEC128, // VEC
+};
+
 // TODO Too simple!
 const RegisterBankInfo::ValueMapping *
 PPCGenRegisterBankInfo::getCopyMapping(unsigned DstBankID, unsigned SrcBankID,
                                        unsigned Size) {
   assert(DstBankID < PPC::NumRegisterBanks && "Invalid bank ID");
   assert(SrcBankID < PPC::NumRegisterBanks && "Invalid bank ID");
+  PartialMappingIdx DstRBIdx = BankIDToCopyMapIdx[DstBankID];
+  PartialMappingIdx SrcRBIdx = BankIDToCopyMapIdx[SrcBankID];
+  assert(DstRBIdx != PMI_None && "No such mapping");
+  assert(SrcRBIdx != PMI_None && "No such mapping");
+
+  if (DstRBIdx == SrcRBIdx)
+    return getValueMapping(DstRBIdx);
 
-  return &ValMappings[1];
+  assert(Size <= 128 && "Can currently handle types up to 128 bits (vectors)!");
+  // TODO: This function needs to be updated to handle all cases for
+  //       GPRs, FPRs and vectors. It currently only handles bitcasting to
+  //       the same type and has only mainly been tested for bitcasting
+  //       between different vector types.
+  unsigned ValMappingIdx = DstRBIdx - PMI_Min;
+
+  return &ValMappings[1 + 3 * ValMappingIdx];
 }
 
 } // namespace llvm
diff --git a/llvm/test/CodeGen/PowerPC/GlobalISel/float-arithmetic.ll b/llvm/test/CodeGen/PowerPC/GlobalISel/float-arithmetic.ll
index 185fa4d..1ebde0e 100644
--- a/llvm/test/CodeGen/PowerPC/GlobalISel/float-arithmetic.ll
+++ b/llvm/test/CodeGen/PowerPC/GlobalISel/float-arithmetic.ll
@@ -51,3 +51,75 @@ entry:
   %div = fdiv float %a, %b
   ret float %div
 }
+
+define <4 x float> @test_fadd_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_fadd_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvaddsp v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = fadd <4 x float> %a, %b
+  ret <4 x float> %res
+}
+
+define <2 x double> @test_fadd_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_fadd_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvadddp v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = fadd <2 x double> %a, %b
+  ret <2 x double> %res
+}
+
+define <4 x float> @test_fsub_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_fsub_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvsubsp v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = fsub <4 x float> %a, %b
+  ret <4 x float> %res
+}
+
+define <2 x double> @test_fsub_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_fsub_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvsubdp v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = fsub <2 x double> %a, %b
+  ret <2 x double> %res
+}
+
+define <4 x float> @test_fmul_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_fmul_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvmulsp v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = fmul <4 x float> %a, %b
+  ret <4 x float> %res
+}
+
+define <2 x double> @test_fmul_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_fmul_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvmuldp v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = fmul <2 x double> %a, %b
+  ret <2 x double> %res
+}
+
+define <4 x float> @test_fdiv_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_fdiv_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvdivsp v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = fdiv <4 x float> %a, %b
+  ret <4 x float> %res
+}
+
+define <2 x double> @test_fdiv_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_fdiv_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvdivdp v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = fdiv <2 x double> %a, %b
+  ret <2 x double> %res
+}
diff --git a/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-arithmentic-vec.ll b/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-arithmentic-vec.ll
new file mode 100644
index 0000000..e38de1b
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-arithmentic-vec.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names -global-isel -o - < %s | FileCheck %s
+
+define <16 x i8> @test_add_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_add_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vaddubm v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = add <16 x i8> %a, %b
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @test_add_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_add_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vadduhm v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = add <8 x i16> %a, %b
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @test_add_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_add_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vadduwm v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = add <4 x i32> %a, %b
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @test_add_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_add_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vaddudm v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = add <2 x i64> %a, %b
+  ret <2 x i64> %res
+}
+
+define <16 x i8> @test_sub_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_sub_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsububm v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = sub <16 x i8> %a, %b
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @test_sub_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_sub_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsubuhm v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = sub <8 x i16> %a, %b
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @test_sub_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_sub_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsubuwm v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = sub <4 x i32> %a, %b
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @test_sub_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_sub_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsubudm v2, v2, v3
+; CHECK-NEXT:    blr
+  %res = sub <2 x i64> %a, %b
+  ret <2 x i64> %res
+}
diff --git a/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-logical-vec.ll b/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-logical-vec.ll
new file mode 100644
index 0000000..07cb570
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-logical-vec.ll
@@ -0,0 +1,174 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names -global-isel -o - < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names -mattr=-vsx -global-isel -o - < %s | \
+; RUN: FileCheck %s --check-prefix=NO-VSX
+
+define <16 x i8> @test_and_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_and_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xxland v2, v2, v3
+; CHECK-NEXT:    blr
+;
+; NO-VSX-LABEL: test_and_v16i8:
+; NO-VSX:       # %bb.0:
+; NO-VSX-NEXT:    vand v2, v2, v3
+; NO-VSX-NEXT:    blr
+  %res = and <16 x i8> %a, %b
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_or_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_or_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xxlor v2, v2, v3
+; CHECK-NEXT:    blr
+;
+; NO-VSX-LABEL: test_or_v16i8:
+; NO-VSX:       # %bb.0:
+; NO-VSX-NEXT:    vor v2, v2, v3
+; NO-VSX-NEXT:    blr
+  %res = or <16 x i8> %a, %b
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_xor_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_xor_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xxlxor v2, v2, v3
+; CHECK-NEXT:    blr
+;
+; NO-VSX-LABEL: test_xor_v16i8:
+; NO-VSX:       # %bb.0:
+; NO-VSX-NEXT:    vxor v2, v2, v3
+; NO-VSX-NEXT:    blr
+  %res = xor <16 x i8> %a, %b
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @test_and_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_and_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xxland v2, v2, v3
+; CHECK-NEXT:    blr
+;
+; NO-VSX-LABEL: test_and_v8i16:
+; NO-VSX:       # %bb.0:
+; NO-VSX-NEXT:    vand v2, v2, v3
+; NO-VSX-NEXT:    blr
+  %res = and <8 x i16> %a, %b
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_or_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_or_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xxlor v2, v2, v3
+; CHECK-NEXT:    blr
+;
+; NO-VSX-LABEL: test_or_v8i16:
+; NO-VSX:       # %bb.0:
+; NO-VSX-NEXT:    vor v2, v2, v3
+; NO-VSX-NEXT:    blr
+  %res = or <8 x i16> %a, %b
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_xor_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_xor_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xxlxor v2, v2, v3
+; CHECK-NEXT:    blr
+;
+; NO-VSX-LABEL: test_xor_v8i16:
+; NO-VSX:       # %bb.0:
+; NO-VSX-NEXT:    vxor v2, v2, v3
+; NO-VSX-NEXT:    blr
+  %res = xor <8 x i16> %a, %b
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @test_and_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_and_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xxland v2, v2, v3
+; CHECK-NEXT:    blr
+;
+; NO-VSX-LABEL: test_and_v4i32:
+; NO-VSX:       # %bb.0:
+; NO-VSX-NEXT:    vand v2, v2, v3
+; NO-VSX-NEXT:    blr
+  %res = and <4 x i32> %a, %b
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_or_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_or_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xxlor v2, v2, v3
+; CHECK-NEXT:    blr
+;
+; NO-VSX-LABEL: test_or_v4i32:
+; NO-VSX:       # %bb.0:
+; NO-VSX-NEXT:    vor v2, v2, v3
+; NO-VSX-NEXT:    blr
+  %res = or <4 x i32> %a, %b
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_xor_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_xor_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xxlxor v2, v2, v3
+; CHECK-NEXT:    blr
+;
+; NO-VSX-LABEL: test_xor_v4i32:
+; NO-VSX:       # %bb.0:
+; NO-VSX-NEXT:    vxor v2, v2, v3
+; NO-VSX-NEXT:    blr
+  %res = xor <4 x i32> %a, %b
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @test_and_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_and_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xxland v2, v2, v3
+; CHECK-NEXT:    blr
+;
+; NO-VSX-LABEL: test_and_v2i64:
+; NO-VSX:       # %bb.0:
+; NO-VSX-NEXT:    vand v2, v2, v3
+; NO-VSX-NEXT:    blr
+  %res = and <2 x i64> %a, %b
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_or_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_or_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xxlor v2, v2, v3
+; CHECK-NEXT:    blr
+;
+; NO-VSX-LABEL: test_or_v2i64:
+; NO-VSX:       # %bb.0:
+; NO-VSX-NEXT:    vor v2, v2, v3
+; NO-VSX-NEXT:    blr
+  %res = or <2 x i64> %a, %b
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_xor_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_xor_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xxlxor v2, v2, v3
+; CHECK-NEXT:    blr
+;
+; NO-VSX-LABEL: test_xor_v2i64:
+; NO-VSX:       # %bb.0:
+; NO-VSX-NEXT:    vxor v2, v2, v3
+; NO-VSX-NEXT:    blr
+  %res = xor <2 x i64> %a, %b
+  ret <2 x i64> %res
+}
diff --git a/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-ret-and-bitcast-vec.ll b/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-ret-and-bitcast-vec.ll
new file mode 100644
index 0000000..b0dd352
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-ret-and-bitcast-vec.ll
@@ -0,0 +1,336 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names -global-isel -o - < %s | FileCheck %s
+
+; Test returning vectors in functions
+define <16 x i8> @test_ret_v16i8(<16 x i8> %a){
+; CHECK-LABEL: test_ret_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  ret <16 x i8> %a
+}
+
+define <8 x i16> @test_ret_v8i16(<8 x i16> %a){
+; CHECK-LABEL: test_ret_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  ret <8 x i16> %a
+}
+
+define <4 x i32> @test_ret_v4i32(<4 x i32> %a){
+; CHECK-LABEL: test_ret_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  ret <4 x i32> %a
+}
+
+define <2 x i64> @test_ret_v2i64(<2 x i64> %a){
+; CHECK-LABEL: test_ret_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  ret <2 x i64> %a
+}
+
+define <4 x float> @test_ret_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_ret_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  ret <4 x float> %a
+}
+
+define <2 x double> @test_ret_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_ret_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  ret <2 x double> %a
+}
+
+; Test simple bitcasting of vectors
+define <16 x i8> @test_bitcast_v16i8_v16i8(<16 x i8> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <16 x i8> %a to <16 x i8>
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v8i16(<8 x i16> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <8 x i16> %a to <16 x i8>
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v4i32(<4 x i32> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <4 x i32> %a to <16 x i8>
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v2i64(<2 x i64> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <2 x i64> %a to <16 x i8>
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <4 x float> %a to <16 x i8>
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <2 x double> %a to <16 x i8>
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v8i16_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <16 x i8> %a to <8 x i16>
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v8i16_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <8 x i16> %a to <8 x i16>
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v8i16_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <4 x i32> %a to <8 x i16>
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v8i16_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <2 x i64> %a to <8 x i16>
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v8i16_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <4 x float> %a to <8 x i16>
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v8i16_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <2 x double> %a to <8 x i16>
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v4i32_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <16 x i8> %a to <4 x i32>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v4i32_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <8 x i16> %a to <4 x i32>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v4i32_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <4 x i32> %a to <4 x i32>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v4i32_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <2 x i64> %a to <4 x i32>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v4i32_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <4 x float> %a to <4 x i32>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v4i32_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <2 x double> %a to <4 x i32>
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v2i64_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <16 x i8> %a to <2 x i64>
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v2i64_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <8 x i16> %a to <2 x i64>
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v2i64_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <4 x i32> %a to <2 x i64>
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v2i64_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <2 x i64> %a to <2 x i64>
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v2i64_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <4 x float> %a to <2 x i64>
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v2i64_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <2 x double> %a to <2 x i64>
+  ret <2 x i64> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v4f32_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <16 x i8> %a to <4 x float>
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v4f32_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <8 x i16> %a to <4 x float>
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v4f32_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <4 x i32> %a to <4 x float>
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v4f32_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <2 x i64> %a to <4 x float>
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v4f32_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <4 x float> %a to <4 x float>
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v4f32_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <2 x double> %a to <4 x float>
+  ret <4 x float> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v2f64_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <16 x i8> %a to <2 x double>
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v2f64_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <8 x i16> %a to <2 x double>
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v2f64_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <4 x i32> %a to <2 x double>
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v2f64_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <2 x i64> %a to <2 x double>
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v2f64_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <4 x float> %a to <2 x double>
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v2f64_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    blr
+  %res = bitcast <2 x double> %a to <2 x double>
+  ret <2 x double> %res
+}
+
-- 
2.7.4