def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [
SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1>
]>;
+def SDT_PPCVecInsertElt : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<3>
+]>;
//===----------------------------------------------------------------------===//
// ISA 3.1 specific PPCISD nodes.
def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx,
[]>;
def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>;
+def PPCvecinsertelt : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsertElt, []>;
//===----------------------------------------------------------------------===//
(XXBLENDVD $A, $B, $C)>;
}
+def InsertEltShift {
+ dag Sub32Left0 = (EXTRACT_SUBREG $rB, sub_32);
+ dag Sub32Left1 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 1, 0, 30);
+ dag Sub32Left2 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 2, 0, 29);
+ dag Left3 = (RLWINM8 $rB, 3, 0, 28);
+}
+
+let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in {
+ // Indexed vector insert element
+ def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
+ (VINSBRX $vDi, InsertEltShift.Sub32Left0, $rA)>;
+ def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i64:$rB)),
+ (VINSHRX $vDi, InsertEltShift.Sub32Left1, $rA)>;
+ def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i64:$rB)),
+ (VINSWRX $vDi, InsertEltShift.Sub32Left2, $rA)>;
+ def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, i64:$rB)),
+ (VINSDRX $vDi, InsertEltShift.Left3, $rA)>;
+
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i64:$rB)),
+ (VINSWRX $vDi, InsertEltShift.Sub32Left2, Bitcast.FltToInt)>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
+ (VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
+ (VINSWRX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
+ (VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;
+
+ def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, f64:$A, i64:$rB)),
+ (VINSDRX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
+ def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
+ (VINSDRX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
+ def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
+ (VINSDRX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
+ def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
+ (VINSDRX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
+
+ // Immediate vector insert element
+ foreach i = [0, 1, 2, 3] in {
+ def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i64 i))),
+ (VINSW $vDi, !mul(!sub(3, i), 4), $rA)>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i64 i))),
+ (VINSW $vDi, !mul(!sub(3, i), 4), (LWZ memri:$rA))>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i64 i))),
+ (VINSW $vDi, !mul(!sub(3, i), 4), (PLWZ memri34:$rA))>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i64 i))),
+ (VINSW $vDi, !mul(!sub(3, i), 4), (LWZX memrr:$rA))>;
+ }
+ foreach i = [0, 1] in
+ def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, (i64 i))),
+ (VINSD $vDi, !mul(!sub(1, i), 8), $rA)>;
+}
+
+let Predicates = [IsISA3_1, HasVSX, IsBigEndian] in {
+ // Indexed vector insert element
+ def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
+ (VINSBLX $vDi, InsertEltShift.Sub32Left0, $rA)>;
+ def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i64:$rB)),
+ (VINSHLX $vDi, InsertEltShift.Sub32Left1, $rA)>;
+ def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i64:$rB)),
+ (VINSWLX $vDi, InsertEltShift.Sub32Left2, $rA)>;
+ def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, i64:$rB)),
+ (VINSDLX $vDi, InsertEltShift.Left3, $rA)>;
+
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i64:$rB)),
+ (VINSWLX $vDi, InsertEltShift.Sub32Left2, Bitcast.FltToInt)>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
+ (VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
+ (VINSWLX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
+ (VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;
+
+ def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, f64:$A, i64:$rB)),
+ (VINSDLX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
+ def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
+ (VINSDLX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
+ def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
+ (VINSDLX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
+ def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
+ (VINSDLX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
+
+ // Immediate vector insert element
+ foreach i = [0, 1, 2, 3] in {
+ def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i64 i))),
+ (VINSW $vDi, !mul(i, 4), $rA)>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i64 i))),
+ (VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i64 i))),
+ (VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i64 i))),
+ (VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>;
+ }
+ foreach i = [0, 1] in
+ def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, (i64 i))),
+ (VINSD $vDi, !mul(i, 8), $rA)>;
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-P9
+
+; Byte indexed
+
+define <16 x i8> @testByte(<16 x i8> %a, i64 %b, i64 %idx) {
+; CHECK-LABEL: testByte:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vinsbrx v2, r6, r5
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testByte:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vinsblx v2, r6, r5
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testByte:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: addi r4, r1, -16
+; CHECK-P9-NEXT: clrldi r3, r6, 60
+; CHECK-P9-NEXT: stxv v2, -16(r1)
+; CHECK-P9-NEXT: stbx r5, r4, r3
+; CHECK-P9-NEXT: lxv v2, -16(r1)
+; CHECK-P9-NEXT: blr
+entry:
+ %conv = trunc i64 %b to i8
+ %vecins = insertelement <16 x i8> %a, i8 %conv, i64 %idx
+ ret <16 x i8> %vecins
+}
+
+; Halfword indexed
+
+define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) {
+; CHECK-LABEL: testHalf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slwi r3, r6, 1
+; CHECK-NEXT: vinshrx v2, r3, r5
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testHalf:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: slwi r3, r6, 1
+; CHECK-BE-NEXT: vinshlx v2, r3, r5
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testHalf:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: addi r4, r1, -16
+; CHECK-P9-NEXT: rlwinm r3, r6, 1, 28, 30
+; CHECK-P9-NEXT: stxv v2, -16(r1)
+; CHECK-P9-NEXT: sthx r5, r4, r3
+; CHECK-P9-NEXT: lxv v2, -16(r1)
+; CHECK-P9-NEXT: blr
+entry:
+ %conv = trunc i64 %b to i16
+ %vecins = insertelement <8 x i16> %a, i16 %conv, i64 %idx
+ ret <8 x i16> %vecins
+}
+
+; Word indexed
+
+define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) {
+; CHECK-LABEL: testWord:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slwi r3, r6, 2
+; CHECK-NEXT: vinswrx v2, r3, r5
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testWord:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: slwi r3, r6, 2
+; CHECK-BE-NEXT: vinswlx v2, r3, r5
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testWord:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: addi r4, r1, -16
+; CHECK-P9-NEXT: rlwinm r3, r6, 2, 28, 29
+; CHECK-P9-NEXT: stxv v2, -16(r1)
+; CHECK-P9-NEXT: stwx r5, r4, r3
+; CHECK-P9-NEXT: lxv v2, -16(r1)
+; CHECK-P9-NEXT: blr
+entry:
+ %conv = trunc i64 %b to i32
+ %vecins = insertelement <4 x i32> %a, i32 %conv, i64 %idx
+ ret <4 x i32> %vecins
+}
+
+; Word immediate
+
+define <4 x i32> @testWordImm(<4 x i32> %a, i64 %b) {
+; CHECK-LABEL: testWordImm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vinsw v2, r5, 8
+; CHECK-NEXT: vinsw v2, r5, 0
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testWordImm:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vinsw v2, r5, 4
+; CHECK-BE-NEXT: vinsw v2, r5, 12
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testWordImm:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: mtfprwz f0, r5
+; CHECK-P9-NEXT: xxinsertw v2, vs0, 4
+; CHECK-P9-NEXT: xxinsertw v2, vs0, 12
+; CHECK-P9-NEXT: blr
+entry:
+ %conv = trunc i64 %b to i32
+ %vecins = insertelement <4 x i32> %a, i32 %conv, i32 1
+ %vecins2 = insertelement <4 x i32> %vecins, i32 %conv, i32 3
+ ret <4 x i32> %vecins2
+}
+
+; Doubleword indexed
+
+define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
+; CHECK-LABEL: testDoubleword:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rlwinm r3, r6, 3, 0, 28
+; CHECK-NEXT: vinsdrx v2, r3, r5
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testDoubleword:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: rlwinm r3, r6, 3, 0, 28
+; CHECK-BE-NEXT: vinsdlx v2, r3, r5
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testDoubleword:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: addi r4, r1, -16
+; CHECK-P9-NEXT: rlwinm r3, r6, 3, 28, 28
+; CHECK-P9-NEXT: stxv v2, -16(r1)
+; CHECK-P9-NEXT: stdx r5, r4, r3
+; CHECK-P9-NEXT: lxv v2, -16(r1)
+; CHECK-P9-NEXT: blr
+entry:
+ %vecins = insertelement <2 x i64> %a, i64 %b, i64 %idx
+ ret <2 x i64> %vecins
+}
+
+; Doubleword immediate
+
+define <2 x i64> @testDoublewordImm(<2 x i64> %a, i64 %b) {
+; CHECK-LABEL: testDoublewordImm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vinsd v2, r5, 0
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testDoublewordImm:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vinsd v2, r5, 8
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testDoublewordImm:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: mtfprd f0, r5
+; CHECK-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-P9-NEXT: blr
+entry:
+ %vecins = insertelement <2 x i64> %a, i64 %b, i32 1
+ ret <2 x i64> %vecins
+}
+
+define <2 x i64> @testDoublewordImm2(<2 x i64> %a, i64 %b) {
+; CHECK-LABEL: testDoublewordImm2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vinsd v2, r5, 8
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testDoublewordImm2:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vinsd v2, r5, 0
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testDoublewordImm2:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: mtfprd f0, r5
+; CHECK-P9-NEXT: xxpermdi v2, vs0, v2, 1
+; CHECK-P9-NEXT: blr
+entry:
+ %vecins = insertelement <2 x i64> %a, i64 %b, i32 0
+ ret <2 x i64> %vecins
+}
+
+; Float indexed
+
+define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
+; CHECK-LABEL: testFloat1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscvdpspn vs0, f1
+; CHECK-NEXT: extsw r3, r6
+; CHECK-NEXT: slwi r3, r3, 2
+; CHECK-NEXT: xxsldwi vs0, vs0, vs0, 3
+; CHECK-NEXT: mffprwz r4, f0
+; CHECK-NEXT: vinswrx v2, r3, r4
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testFloat1:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xscvdpspn vs0, f1
+; CHECK-BE-NEXT: extsw r3, r6
+; CHECK-BE-NEXT: slwi r3, r3, 2
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3
+; CHECK-BE-NEXT: mffprwz r4, f0
+; CHECK-BE-NEXT: vinswlx v2, r3, r4
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testFloat1:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: addi r4, r1, -16
+; CHECK-P9-NEXT: rlwinm r3, r6, 2, 28, 29
+; CHECK-P9-NEXT: stxv v2, -16(r1)
+; CHECK-P9-NEXT: stfsx f1, r4, r3
+; CHECK-P9-NEXT: lxv v2, -16(r1)
+; CHECK-P9-NEXT: blr
+entry:
+ %vecins = insertelement <4 x float> %a, float %b, i32 %idx1
+ ret <4 x float> %vecins
+}
+
+define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
+; CHECK-LABEL: testFloat2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lwz r3, 0(r5)
+; CHECK-NEXT: extsw r4, r6
+; CHECK-NEXT: slwi r4, r4, 2
+; CHECK-NEXT: vinswrx v2, r4, r3
+; CHECK-NEXT: lwz r3, 1(r5)
+; CHECK-NEXT: extsw r4, r7
+; CHECK-NEXT: slwi r4, r4, 2
+; CHECK-NEXT: vinswrx v2, r4, r3
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testFloat2:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lwz r3, 0(r5)
+; CHECK-BE-NEXT: extsw r4, r6
+; CHECK-BE-NEXT: slwi r4, r4, 2
+; CHECK-BE-NEXT: vinswlx v2, r4, r3
+; CHECK-BE-NEXT: lwz r3, 1(r5)
+; CHECK-BE-NEXT: extsw r4, r7
+; CHECK-BE-NEXT: slwi r4, r4, 2
+; CHECK-BE-NEXT: vinswlx v2, r4, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testFloat2:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: lwz r3, 0(r5)
+; CHECK-P9-NEXT: rlwinm r4, r6, 2, 28, 29
+; CHECK-P9-NEXT: addi r6, r1, -32
+; CHECK-P9-NEXT: stxv v2, -32(r1)
+; CHECK-P9-NEXT: stwx r3, r6, r4
+; CHECK-P9-NEXT: rlwinm r4, r7, 2, 28, 29
+; CHECK-P9-NEXT: lxv vs0, -32(r1)
+; CHECK-P9-NEXT: lwz r3, 1(r5)
+; CHECK-P9-NEXT: addi r5, r1, -16
+; CHECK-P9-NEXT: stxv vs0, -16(r1)
+; CHECK-P9-NEXT: stwx r3, r5, r4
+; CHECK-P9-NEXT: lxv v2, -16(r1)
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = bitcast i8* %b to float*
+ %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
+ %1 = bitcast i8* %add.ptr1 to float*
+ %2 = load float, float* %0, align 4
+ %vecins = insertelement <4 x float> %a, float %2, i32 %idx1
+ %3 = load float, float* %1, align 4
+ %vecins2 = insertelement <4 x float> %vecins, float %3, i32 %idx2
+ ret <4 x float> %vecins2
+}
+
+define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
+; CHECK-LABEL: testFloat3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: plwz r3, 65536(r5), 0
+; CHECK-NEXT: extsw r4, r6
+; CHECK-NEXT: slwi r4, r4, 2
+; CHECK-NEXT: vinswrx v2, r4, r3
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: extsw r4, r7
+; CHECK-NEXT: rldic r3, r3, 36, 27
+; CHECK-NEXT: slwi r4, r4, 2
+; CHECK-NEXT: lwzx r3, r5, r3
+; CHECK-NEXT: vinswrx v2, r4, r3
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testFloat3:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: plwz r3, 65536(r5), 0
+; CHECK-BE-NEXT: extsw r4, r6
+; CHECK-BE-NEXT: slwi r4, r4, 2
+; CHECK-BE-NEXT: vinswlx v2, r4, r3
+; CHECK-BE-NEXT: li r3, 1
+; CHECK-BE-NEXT: extsw r4, r7
+; CHECK-BE-NEXT: rldic r3, r3, 36, 27
+; CHECK-BE-NEXT: slwi r4, r4, 2
+; CHECK-BE-NEXT: lwzx r3, r5, r3
+; CHECK-BE-NEXT: vinswlx v2, r4, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testFloat3:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: lis r3, 1
+; CHECK-P9-NEXT: rlwinm r4, r6, 2, 28, 29
+; CHECK-P9-NEXT: addi r6, r1, -32
+; CHECK-P9-NEXT: lwzx r3, r5, r3
+; CHECK-P9-NEXT: stxv v2, -32(r1)
+; CHECK-P9-NEXT: stwx r3, r6, r4
+; CHECK-P9-NEXT: li r3, 1
+; CHECK-P9-NEXT: rlwinm r4, r7, 2, 28, 29
+; CHECK-P9-NEXT: lxv vs0, -32(r1)
+; CHECK-P9-NEXT: rldic r3, r3, 36, 27
+; CHECK-P9-NEXT: lwzx r3, r5, r3
+; CHECK-P9-NEXT: addi r5, r1, -16
+; CHECK-P9-NEXT: stxv vs0, -16(r1)
+; CHECK-P9-NEXT: stwx r3, r5, r4
+; CHECK-P9-NEXT: lxv v2, -16(r1)
+; CHECK-P9-NEXT: blr
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
+ %0 = bitcast i8* %add.ptr to float*
+ %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 68719476736
+ %1 = bitcast i8* %add.ptr1 to float*
+ %2 = load float, float* %0, align 4
+ %vecins = insertelement <4 x float> %a, float %2, i32 %idx1
+ %3 = load float, float* %1, align 4
+ %vecins2 = insertelement <4 x float> %vecins, float %3, i32 %idx2
+ ret <4 x float> %vecins2
+}
+
+; Float immediate
+
+define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
+; CHECK-LABEL: testFloatImm1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscvdpspn vs0, f1
+; CHECK-NEXT: xxsldwi vs0, vs0, vs0, 3
+; CHECK-NEXT: xxinsertw v2, vs0, 12
+; CHECK-NEXT: xxinsertw v2, vs0, 4
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testFloatImm1:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xscvdpspn vs0, f1
+; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3
+; CHECK-BE-NEXT: xxinsertw v2, vs0, 0
+; CHECK-BE-NEXT: xxinsertw v2, vs0, 8
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testFloatImm1:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: xscvdpspn vs0, f1
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
+; CHECK-P9-NEXT: xxinsertw v2, vs0, 0
+; CHECK-P9-NEXT: xxinsertw v2, vs0, 8
+; CHECK-P9-NEXT: blr
+entry:
+ %vecins = insertelement <4 x float> %a, float %b, i32 0
+ %vecins1 = insertelement <4 x float> %vecins, float %b, i32 2
+ ret <4 x float> %vecins1
+}
+
+define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
+; CHECK-LABEL: testFloatImm2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lwz r3, 0(r5)
+; CHECK-NEXT: vinsw v2, r3, 12
+; CHECK-NEXT: lwz r3, 4(r5)
+; CHECK-NEXT: vinsw v2, r3, 4
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testFloatImm2:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lwz r3, 0(r5)
+; CHECK-BE-NEXT: vinsw v2, r3, 0
+; CHECK-BE-NEXT: lwz r3, 4(r5)
+; CHECK-BE-NEXT: vinsw v2, r3, 8
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testFloatImm2:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: lfs f0, 0(r5)
+; CHECK-P9-NEXT: xscvdpspn vs0, f0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
+; CHECK-P9-NEXT: xxinsertw v2, vs0, 0
+; CHECK-P9-NEXT: lfs f0, 4(r5)
+; CHECK-P9-NEXT: xscvdpspn vs0, f0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
+; CHECK-P9-NEXT: xxinsertw v2, vs0, 8
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = bitcast i32* %b to float*
+ %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 1
+ %1 = bitcast i32* %add.ptr1 to float*
+ %2 = load float, float* %0, align 4
+ %vecins = insertelement <4 x float> %a, float %2, i32 0
+ %3 = load float, float* %1, align 4
+ %vecins2 = insertelement <4 x float> %vecins, float %3, i32 2
+ ret <4 x float> %vecins2
+}
+
+define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
+; CHECK-LABEL: testFloatImm3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: plwz r3, 262144(r5), 0
+; CHECK-NEXT: vinsw v2, r3, 12
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: rldic r3, r3, 38, 25
+; CHECK-NEXT: lwzx r3, r5, r3
+; CHECK-NEXT: vinsw v2, r3, 4
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testFloatImm3:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: plwz r3, 262144(r5), 0
+; CHECK-BE-NEXT: vinsw v2, r3, 0
+; CHECK-BE-NEXT: li r3, 1
+; CHECK-BE-NEXT: rldic r3, r3, 38, 25
+; CHECK-BE-NEXT: lwzx r3, r5, r3
+; CHECK-BE-NEXT: vinsw v2, r3, 8
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testFloatImm3:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: lis r3, 4
+; CHECK-P9-NEXT: lfsx f0, r5, r3
+; CHECK-P9-NEXT: li r3, 1
+; CHECK-P9-NEXT: rldic r3, r3, 38, 25
+; CHECK-P9-NEXT: xscvdpspn vs0, f0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
+; CHECK-P9-NEXT: xxinsertw v2, vs0, 0
+; CHECK-P9-NEXT: lfsx f0, r5, r3
+; CHECK-P9-NEXT: xscvdpspn vs0, f0
+; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
+; CHECK-P9-NEXT: xxinsertw v2, vs0, 8
+; CHECK-P9-NEXT: blr
+entry:
+ %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
+ %0 = bitcast i32* %add.ptr to float*
+ %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 68719476736
+ %1 = bitcast i32* %add.ptr1 to float*
+ %2 = load float, float* %0, align 4
+ %vecins = insertelement <4 x float> %a, float %2, i32 0
+ %3 = load float, float* %1, align 4
+ %vecins2 = insertelement <4 x float> %vecins, float %3, i32 2
+ ret <4 x float> %vecins2
+}
+
+; Double indexed
+
+define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) {
+; CHECK-LABEL: testDouble1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: extsw r4, r6
+; CHECK-NEXT: mffprd r3, f1
+; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT: vinsdrx v2, r4, r3
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testDouble1:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: extsw r4, r6
+; CHECK-BE-NEXT: mffprd r3, f1
+; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT: vinsdlx v2, r4, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testDouble1:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: addi r4, r1, -16
+; CHECK-P9-NEXT: rlwinm r3, r6, 3, 28, 28
+; CHECK-P9-NEXT: stxv v2, -16(r1)
+; CHECK-P9-NEXT: stfdx f1, r4, r3
+; CHECK-P9-NEXT: lxv v2, -16(r1)
+; CHECK-P9-NEXT: blr
+entry:
+ %vecins = insertelement <2 x double> %a, double %b, i32 %idx1
+ ret <2 x double> %vecins
+}
+
+define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
+; CHECK-LABEL: testDouble2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld r3, 0(r5)
+; CHECK-NEXT: extsw r4, r6
+; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT: vinsdrx v2, r4, r3
+; CHECK-NEXT: pld r3, 1(r5), 0
+; CHECK-NEXT: extsw r4, r7
+; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT: vinsdrx v2, r4, r3
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testDouble2:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: ld r3, 0(r5)
+; CHECK-BE-NEXT: extsw r4, r6
+; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT: vinsdlx v2, r4, r3
+; CHECK-BE-NEXT: pld r3, 1(r5), 0
+; CHECK-BE-NEXT: extsw r4, r7
+; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT: vinsdlx v2, r4, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testDouble2:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: ld r3, 0(r5)
+; CHECK-P9-NEXT: rlwinm r4, r6, 3, 28, 28
+; CHECK-P9-NEXT: addi r6, r1, -32
+; CHECK-P9-NEXT: stxv v2, -32(r1)
+; CHECK-P9-NEXT: stdx r3, r6, r4
+; CHECK-P9-NEXT: li r3, 1
+; CHECK-P9-NEXT: rlwinm r4, r7, 3, 28, 28
+; CHECK-P9-NEXT: lxv vs0, -32(r1)
+; CHECK-P9-NEXT: ldx r3, r5, r3
+; CHECK-P9-NEXT: addi r5, r1, -16
+; CHECK-P9-NEXT: stxv vs0, -16(r1)
+; CHECK-P9-NEXT: stdx r3, r5, r4
+; CHECK-P9-NEXT: lxv v2, -16(r1)
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = bitcast i8* %b to double*
+ %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
+ %1 = bitcast i8* %add.ptr1 to double*
+ %2 = load double, double* %0, align 8
+ %vecins = insertelement <2 x double> %a, double %2, i32 %idx1
+ %3 = load double, double* %1, align 8
+ %vecins2 = insertelement <2 x double> %vecins, double %3, i32 %idx2
+ ret <2 x double> %vecins2
+}
+
+define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
+; CHECK-LABEL: testDouble3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, 65536(r5), 0
+; CHECK-NEXT: extsw r4, r6
+; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT: vinsdrx v2, r4, r3
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: extsw r4, r7
+; CHECK-NEXT: rldic r3, r3, 36, 27
+; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT: ldx r3, r5, r3
+; CHECK-NEXT: vinsdrx v2, r4, r3
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testDouble3:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: pld r3, 65536(r5), 0
+; CHECK-BE-NEXT: extsw r4, r6
+; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT: vinsdlx v2, r4, r3
+; CHECK-BE-NEXT: li r3, 1
+; CHECK-BE-NEXT: extsw r4, r7
+; CHECK-BE-NEXT: rldic r3, r3, 36, 27
+; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT: ldx r3, r5, r3
+; CHECK-BE-NEXT: vinsdlx v2, r4, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testDouble3:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: lis r3, 1
+; CHECK-P9-NEXT: rlwinm r4, r6, 3, 28, 28
+; CHECK-P9-NEXT: addi r6, r1, -32
+; CHECK-P9-NEXT: ldx r3, r5, r3
+; CHECK-P9-NEXT: stxv v2, -32(r1)
+; CHECK-P9-NEXT: stdx r3, r6, r4
+; CHECK-P9-NEXT: li r3, 1
+; CHECK-P9-NEXT: rlwinm r4, r7, 3, 28, 28
+; CHECK-P9-NEXT: lxv vs0, -32(r1)
+; CHECK-P9-NEXT: rldic r3, r3, 36, 27
+; CHECK-P9-NEXT: ldx r3, r5, r3
+; CHECK-P9-NEXT: addi r5, r1, -16
+; CHECK-P9-NEXT: stxv vs0, -16(r1)
+; CHECK-P9-NEXT: stdx r3, r5, r4
+; CHECK-P9-NEXT: lxv v2, -16(r1)
+; CHECK-P9-NEXT: blr
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
+ %0 = bitcast i8* %add.ptr to double*
+ %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 68719476736
+ %1 = bitcast i8* %add.ptr1 to double*
+ %2 = load double, double* %0, align 8
+ %vecins = insertelement <2 x double> %a, double %2, i32 %idx1
+ %3 = load double, double* %1, align 8
+ %vecins2 = insertelement <2 x double> %vecins, double %3, i32 %idx2
+ ret <2 x double> %vecins2
+}
+
+; Double immediate
+
+define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
+; CHECK-LABEL: testDoubleImm1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; CHECK-NEXT: xxmrghd v2, v2, vs1
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testDoubleImm1:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; CHECK-BE-NEXT: xxpermdi v2, vs1, v2, 1
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testDoubleImm1:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; CHECK-P9-NEXT: xxpermdi v2, vs1, v2, 1
+; CHECK-P9-NEXT: blr
+entry:
+ %vecins = insertelement <2 x double> %a, double %b, i32 0
+ ret <2 x double> %vecins
+}
+
+define <2 x double> @testDoubleImm2(<2 x double> %a, i32* %b) {
+; CHECK-LABEL: testDoubleImm2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lfd f0, 0(r5)
+; CHECK-NEXT: xxmrghd v2, v2, vs0
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testDoubleImm2:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lfd f0, 0(r5)
+; CHECK-BE-NEXT: xxpermdi v2, vs0, v2, 1
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testDoubleImm2:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: lfd f0, 0(r5)
+; CHECK-P9-NEXT: xxpermdi v2, vs0, v2, 1
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = bitcast i32* %b to double*
+ %1 = load double, double* %0, align 8
+ %vecins = insertelement <2 x double> %a, double %1, i32 0
+ ret <2 x double> %vecins
+}
+
+define <2 x double> @testDoubleImm3(<2 x double> %a, i32* %b) {
+; CHECK-LABEL: testDoubleImm3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lfd f0, 4(r5)
+; CHECK-NEXT: xxmrghd v2, v2, vs0
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testDoubleImm3:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lfd f0, 4(r5)
+; CHECK-BE-NEXT: xxpermdi v2, vs0, v2, 1
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testDoubleImm3:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: lfd f0, 4(r5)
+; CHECK-P9-NEXT: xxpermdi v2, vs0, v2, 1
+; CHECK-P9-NEXT: blr
+entry:
+ %add.ptr = getelementptr inbounds i32, i32* %b, i64 1
+ %0 = bitcast i32* %add.ptr to double*
+ %1 = load double, double* %0, align 8
+ %vecins = insertelement <2 x double> %a, double %1, i32 0
+ ret <2 x double> %vecins
+}
+
+define <2 x double> @testDoubleImm4(<2 x double> %a, i32* %b) {
+; CHECK-LABEL: testDoubleImm4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lis r3, 4
+; CHECK-NEXT: lfdx f0, r5, r3
+; CHECK-NEXT: xxmrghd v2, v2, vs0
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testDoubleImm4:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lis r3, 4
+; CHECK-BE-NEXT: lfdx f0, r5, r3
+; CHECK-BE-NEXT: xxpermdi v2, vs0, v2, 1
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testDoubleImm4:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: lis r3, 4
+; CHECK-P9-NEXT: lfdx f0, r5, r3
+; CHECK-P9-NEXT: xxpermdi v2, vs0, v2, 1
+; CHECK-P9-NEXT: blr
+entry:
+ %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
+ %0 = bitcast i32* %add.ptr to double*
+ %1 = load double, double* %0, align 8
+ %vecins = insertelement <2 x double> %a, double %1, i32 0
+ ret <2 x double> %vecins
+}
+
+define <2 x double> @testDoubleImm5(<2 x double> %a, i32* %b) {
+; CHECK-LABEL: testDoubleImm5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: rldic r3, r3, 38, 25
+; CHECK-NEXT: lfdx f0, r5, r3
+; CHECK-NEXT: xxmrghd v2, v2, vs0
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testDoubleImm5:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: li r3, 1
+; CHECK-BE-NEXT: rldic r3, r3, 38, 25
+; CHECK-BE-NEXT: lfdx f0, r5, r3
+; CHECK-BE-NEXT: xxpermdi v2, vs0, v2, 1
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testDoubleImm5:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: li r3, 1
+; CHECK-P9-NEXT: rldic r3, r3, 38, 25
+; CHECK-P9-NEXT: lfdx f0, r5, r3
+; CHECK-P9-NEXT: xxpermdi v2, vs0, v2, 1
+; CHECK-P9-NEXT: blr
+entry:
+ %add.ptr = getelementptr inbounds i32, i32* %b, i64 68719476736
+ %0 = bitcast i32* %add.ptr to double*
+ %1 = load double, double* %0, align 8
+ %vecins = insertelement <2 x double> %a, double %1, i32 0
+ ret <2 x double> %vecins
+}
+