From 7486de1b2eced2cccc7b0b95598e9ab45039d700 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 29 Dec 2020 06:32:45 -0600 Subject: [PATCH] [PowerPC] Provide patterns for permuted scalar to vector for pre-P8 We will emit these permuted nodes on all VSX little endian subtargets but don't have the patterns available to match them on subtargets that don't have direct moves. Fixes: https://bugs.llvm.org/show_bug.cgi?id=47916 --- llvm/lib/Target/PowerPC/PPCInstrVSX.td | 14 ++ llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp | 7 +- .../CodeGen/PowerPC/canonical-merge-shuffles.ll | 149 +++++++++++++++++++-- llvm/test/CodeGen/PowerPC/load-and-splat.ll | 3 +- llvm/test/CodeGen/PowerPC/pr47916.ll | 17 +++ 5 files changed, 177 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/pr47916.ll diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 4e08636..136a53e 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2412,6 +2412,7 @@ def MrgWords { // [HasVSX, IsBigEndian] // [HasVSX, IsLittleEndian] // [HasVSX, NoP9Vector] +// [HasVSX, NoP9Vector, IsLittleEndian] // [HasVSX, HasOnlySwappingMemOps] // [HasVSX, HasOnlySwappingMemOps, IsBigEndian] // [HasVSX, HasP8Vector] @@ -3005,6 +3006,19 @@ defm : ScalToVecWPermute< VSFRC)), sub_64)>; } // HasVSX, NoP9Vector +// Any little endian pre-Power9 VSX subtarget. +let Predicates = [HasVSX, NoP9Vector, IsLittleEndian] in { +// Load-and-splat using only X-Form VSX loads. +defm : ScalToVecWPermute< + v2i64, (i64 (load xoaddr:$src)), + (XXPERMDIs (XFLOADf64 xoaddr:$src), 2), + (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>; +defm : ScalToVecWPermute< + v2f64, (f64 (load xoaddr:$src)), + (XXPERMDIs (XFLOADf64 xoaddr:$src), 2), + (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>; +} // HasVSX, NoP9Vector, IsLittleEndian + // Any VSX subtarget that only has loads and stores that load in big endian // order regardless of endianness. This is really pre-Power9 subtargets. let Predicates = [HasVSX, HasOnlySwappingMemOps] in { diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index b7ed8ce..ff251f5 100644 --- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -254,10 +254,11 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { if (!MO.isReg()) continue; Register Reg = MO.getReg(); - if (isAnyVecReg(Reg, Partial)) { + // All operands need to be checked because there are instructions that + // operate on a partial register and produce a full register (such as + // XXPERMDIs). + if (isAnyVecReg(Reg, Partial)) RelevantInstr = true; - break; - } } if (!RelevantInstr) diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll index cdd04b3..35b590d 100644 --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -8,6 +8,9 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr8 -mattr=-vsx -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-NOVSX +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-P7 define dso_local <16 x i8> @testmrghb(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { ; CHECK-P8-LABEL: testmrghb: @@ -24,6 +27,11 @@ define dso_local <16 x i8> @testmrghb(<16 x i8> %a, <16 x i8> %b) local_unnamed_ ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: vmrghb v2, v3, v2 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testmrghb: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: vmrghb v2, v3, v2 +; CHECK-P7-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -46,6 +54,11 @@ define dso_local <16 x i8> @testmrghb2(<16 x i8> %a, <16 x i8> %b) local_unnamed ; CHECK-NOVSX-NEXT: lvx v4, 0, r3 ; CHECK-NOVSX-NEXT: vperm v2, v3, v2, v4 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testmrghb2: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: vmrghb v2, v2, v3 +; CHECK-P7-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -65,6 +78,11 @@ define dso_local <16 x i8> @testmrghh(<16 x i8> %a, <16 x i8> %b) local_unnamed_ ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: vmrghh v2, v3, v2 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testmrghh: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: vmrghh v2, v3, v2 +; CHECK-P7-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -87,6 +105,11 @@ define dso_local <16 x i8> @testmrghh2(<16 x i8> %a, <16 x i8> %b) local_unnamed ; CHECK-NOVSX-NEXT: lvx v4, 0, r3 ; CHECK-NOVSX-NEXT: vperm v2, v3, v2, v4 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testmrghh2: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: vmrghh v2, v2, v3 +; CHECK-P7-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -106,6 +129,11 @@ define dso_local <16 x i8> @testmrglb(<16 x i8> %a, <16 x i8> %b) local_unnamed_ ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: vmrglb v2, v3, v2 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testmrglb: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: vmrglb v2, v3, v2 +; CHECK-P7-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -128,6 +156,11 @@ define dso_local <16 x i8> @testmrglb2(<16 x i8> %a, <16 x i8> %b) local_unnamed ; CHECK-NOVSX-NEXT: lvx v4, 0, r3 ; CHECK-NOVSX-NEXT: vperm v2, v3, v2, v4 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testmrglb2: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: vmrglb v2, v2, v3 +; CHECK-P7-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -147,6 +180,11 @@ define dso_local <16 x i8> @testmrglh(<16 x i8> %a, <16 x i8> %b) local_unnamed_ ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: vmrglh v2, v3, v2 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testmrglh: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: vmrglh v2, v3, v2 +; CHECK-P7-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -169,6 +207,11 @@ define dso_local <16 x i8> @testmrglh2(<16 x i8> %a, <16 x i8> %b) local_unnamed ; CHECK-NOVSX-NEXT: lvx v4, 0, r3 ; CHECK-NOVSX-NEXT: vperm v2, v3, v2, v4 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testmrglh2: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: vmrglh v2, v2, v3 +; CHECK-P7-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -188,6 +231,11 @@ define dso_local <16 x i8> @testmrghw(<16 x i8> %a, <16 x i8> %b) local_unnamed_ ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: vmrghw v2, v3, v2 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testmrghw: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: vmrghw v2, v3, v2 +; CHECK-P7-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -210,6 +258,11 @@ define dso_local <16 x i8> @testmrghw2(<16 x i8> %a, <16 x i8> %b) local_unnamed ; CHECK-NOVSX-NEXT: lvx v4, 0, r3 ; CHECK-NOVSX-NEXT: vperm v2, v3, v2, v4 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testmrghw2: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: vmrghw v2, v2, v3 +; CHECK-P7-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -229,6 +282,11 @@ define dso_local <16 x i8> @testmrglw(<16 x i8> %a, <16 x i8> %b) local_unnamed_ ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: vmrglw v2, v3, v2 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testmrglw: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: vmrglw v2, v3, v2 +; CHECK-P7-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -251,6 +309,11 @@ define dso_local <16 x i8> @testmrglw2(<16 x i8> %a, <16 x i8> %b) local_unnamed ; CHECK-NOVSX-NEXT: lvx v4, 0, r3 ; CHECK-NOVSX-NEXT: vperm v2, v3, v2, v4 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testmrglw2: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: vmrglw v2, v2, v3 +; CHECK-P7-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -259,10 +322,9 @@ entry: define dso_local <8 x i16> @testmrglb3(<8 x i8>* nocapture readonly %a) local_unnamed_addr #0 { ; CHECK-P8-LABEL: testmrglb3: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: ld r3, 0(r3) -; CHECK-P8-NEXT: xxlxor v2, v2, v2 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-P8-NEXT: vmrghb v2, v3, v2 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: testmrglb3: @@ -284,6 +346,13 @@ define dso_local <8 x i16> @testmrglb3(<8 x i8>* nocapture readonly %a) local_un ; CHECK-NOVSX-NEXT: lvx v4, 0, r3 ; CHECK-NOVSX-NEXT: vperm v2, v4, v2, v3 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testmrglb3: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: lxsdx v2, 0, r3 +; CHECK-P7-NEXT: xxlxor v3, v3, v3 +; CHECK-P7-NEXT: vmrghb v2, v3, v2 +; CHECK-P7-NEXT: blr entry: %0 = load <8 x i8>, <8 x i8>* %a, align 8 %1 = zext <8 x i8> %0 to <8 x i16> @@ -332,6 +401,21 @@ define dso_local void @no_crash_elt0_from_RHS(<2 x double>* noalias nocapture de ; CHECK-NOVSX-NEXT: li r3, 0 ; CHECK-NOVSX-NEXT: stfd f1, 8(r30) ; CHECK-NOVSX-NEXT: std r3, 0(r30) +; +; CHECK-P7-LABEL: no_crash_elt0_from_RHS: +; CHECK-P7: # %bb.0: # %test_entry +; CHECK-P7-NEXT: mflr r0 +; CHECK-P7-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P7-NEXT: std r0, 16(r1) +; CHECK-P7-NEXT: stdu r1, -48(r1) +; CHECK-P7-NEXT: mr r30, r3 +; CHECK-P7-NEXT: bl dummy +; CHECK-P7-NEXT: nop +; CHECK-P7-NEXT: xxlxor f0, f0, f0 +; CHECK-P7-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-P7-NEXT: xxmrghd vs0, vs1, vs0 +; CHECK-P7-NEXT: xxswapd vs0, vs0 +; CHECK-P7-NEXT: stxvd2x vs0, 0, r30 test_entry: %_div_result = tail call double @dummy() %oldret = insertvalue { double, double } undef, double %_div_result, 0 @@ -364,6 +448,17 @@ define dso_local <16 x i8> @no_crash_bitcast(i32 %a) { ; CHECK-NOVSX-NEXT: lvx v2, 0, r4 ; CHECK-NOVSX-NEXT: vperm v2, v3, v3, v2 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: no_crash_bitcast: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: addis r4, r2, .LCPI14_0@toc@ha +; CHECK-P7-NEXT: stw r3, -16(r1) +; CHECK-P7-NEXT: addi r3, r1, -16 +; CHECK-P7-NEXT: addi r4, r4, .LCPI14_0@toc@l +; CHECK-P7-NEXT: lvx v3, 0, r3 +; CHECK-P7-NEXT: lvx v2, 0, r4 +; CHECK-P7-NEXT: vperm v2, v3, v3, v2 +; CHECK-P7-NEXT: blr entry: %cast = bitcast i32 %a to <4 x i8> %ret = shufflevector <4 x i8> %cast, <4 x i8> undef, <16 x i32> @@ -397,6 +492,17 @@ define dso_local <4 x i32> @replace_undefs_in_splat(<4 x i32> %a) local_unnamed_ ; CHECK-NOVSX-NEXT: lvx v4, 0, r3 ; CHECK-NOVSX-NEXT: vperm v2, v4, v2, v3 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: replace_undefs_in_splat: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: addis r3, r2, .LCPI15_0@toc@ha +; CHECK-P7-NEXT: addis r4, r2, .LCPI15_1@toc@ha +; CHECK-P7-NEXT: addi r3, r3, .LCPI15_0@toc@l +; CHECK-P7-NEXT: lvx v3, 0, r3 +; CHECK-P7-NEXT: addi r3, r4, .LCPI15_1@toc@l +; CHECK-P7-NEXT: lvx v4, 0, r3 +; CHECK-P7-NEXT: vperm v2, v4, v2, v3 +; CHECK-P7-NEXT: blr entry: %vecins1 = shufflevector <4 x i32> %a, <4 x i32> , <4 x i32> ret <4 x i32> %vecins1 @@ -435,6 +541,18 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(i32* nocapture re ; CHECK-NOVSX-NEXT: lvx v3, 0, r3 ; CHECK-NOVSX-NEXT: vmrglb v2, v2, v3 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: no_RAUW_in_combine_during_legalize: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: sldi r4, r4, 2 +; CHECK-P7-NEXT: addi r5, r1, -16 +; CHECK-P7-NEXT: xxlxor v3, v3, v3 +; CHECK-P7-NEXT: lwzx r3, r3, r4 +; CHECK-P7-NEXT: std r3, -16(r1) +; CHECK-P7-NEXT: lxvd2x vs0, 0, r5 +; CHECK-P7-NEXT: xxswapd v2, vs0 +; CHECK-P7-NEXT: vmrglb v2, v3, v2 +; CHECK-P7-NEXT: blr entry: %idx.ext = sext i32 %offset to i64 %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 %idx.ext @@ -449,8 +567,7 @@ entry: define dso_local <4 x i32> @testSplat4Low(<8 x i8>* nocapture readonly %ptr) local_unnamed_addr #0 { ; CHECK-P8-LABEL: testSplat4Low: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: ld r3, 0(r3) -; CHECK-P8-NEXT: mtfprd f0, r3 +; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xxspltw v2, vs0, 0 ; CHECK-P8-NEXT: blr ; @@ -468,6 +585,12 @@ define dso_local <4 x i32> @testSplat4Low(<8 x i8>* nocapture readonly %ptr) loc ; CHECK-NOVSX-NEXT: lvx v2, 0, r4 ; CHECK-NOVSX-NEXT: vspltw v2, v2, 2 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testSplat4Low: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: lfdx f0, 0, r3 +; CHECK-P7-NEXT: xxspltw v2, vs0, 0 +; CHECK-P7-NEXT: blr entry: %0 = load <8 x i8>, <8 x i8>* %ptr, align 8 %vecinit18 = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> @@ -479,8 +602,7 @@ entry: define dso_local <4 x i32> @testSplat4hi(<8 x i8>* nocapture readonly %ptr) local_unnamed_addr #0 { ; CHECK-P8-LABEL: testSplat4hi: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: ld r3, 0(r3) -; CHECK-P8-NEXT: mtfprd f0, r3 +; CHECK-P8-NEXT: lfdx f0, 0, r3 ; CHECK-P8-NEXT: xxspltw v2, vs0, 1 ; CHECK-P8-NEXT: blr ; @@ -497,6 +619,12 @@ define dso_local <4 x i32> @testSplat4hi(<8 x i8>* nocapture readonly %ptr) loca ; CHECK-NOVSX-NEXT: lvx v2, 0, r4 ; CHECK-NOVSX-NEXT: vspltw v2, v2, 3 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testSplat4hi: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: lfdx f0, 0, r3 +; CHECK-P7-NEXT: xxspltw v2, vs0, 1 +; CHECK-P7-NEXT: blr entry: %0 = load <8 x i8>, <8 x i8>* %ptr, align 8 %vecinit22 = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> @@ -527,6 +655,11 @@ define dso_local <2 x i64> @testSplat8(<8 x i8>* nocapture readonly %ptr) local_ ; CHECK-NOVSX-NEXT: lvx v3, 0, r3 ; CHECK-NOVSX-NEXT: vperm v2, v3, v3, v2 ; CHECK-NOVSX-NEXT: blr +; +; CHECK-P7-LABEL: testSplat8: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: lxvdsx v2, 0, r3 +; CHECK-P7-NEXT: blr entry: %0 = load <8 x i8>, <8 x i8>* %ptr, align 8 %vecinit30 = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll index 26da1fd..c011e45 100644 --- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll +++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll @@ -126,8 +126,7 @@ define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) { ; ; P8-LABEL: adjusted_lxvwsx: ; P8: # %bb.0: # %entry -; P8-NEXT: ld r3, 0(r3) -; P8-NEXT: mtfprd f0, r3 +; P8-NEXT: lfdx f0, 0, r3 ; P8-NEXT: xxspltw v2, vs0, 0 ; P8-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/pr47916.ll b/llvm/test/CodeGen/PowerPC/pr47916.ll new file mode 100644 index 0000000..f42694b --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr47916.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s +define dso_local void @_Z1jjPiPj() local_unnamed_addr #0 { +; CHECK-LABEL: _Z1jjPiPj: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxsdx v2, 0, r3 +; CHECK-NEXT: vmrghw v2, v2, v2 +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: stxvd2x vs0, 0, r3 +; CHECK-NEXT: blr +entry: + %wide.load42 = load <2 x i32>, <2 x i32>* undef, align 4 + %interleaved.vec49 = shufflevector <2 x i32> %wide.load42, <2 x i32> undef, <4 x i32> + store <4 x i32> %interleaved.vec49, <4 x i32>* undef, align 4 + ret void +} -- 2.7.4