From f57f150b1b4c357ca50291d17f16dfb5dd735527 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 29 Nov 2016 23:00:33 +0000 Subject: [PATCH] Revert https://reviews.llvm.org/rL287679 This commit caused some miscompiles that did not show up on any of the bots. Reverting until we can investigate the cause of those failures. llvm-svn: 288214 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 15 ---------- llvm/lib/Target/PowerPC/PPCInstrVSX.td | 14 ++++----- llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll | 20 ++++++------- llvm/test/CodeGen/PowerPC/swaps-le-1.ll | 46 +++++++++++++---------------- llvm/test/CodeGen/PowerPC/swaps-le-2.ll | 31 ++++++++----------- llvm/test/CodeGen/PowerPC/vsx-ldst.ll | 6 ++-- llvm/test/CodeGen/PowerPC/vsx.ll | 39 ++++++++++-------------- 7 files changed, 65 insertions(+), 106 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 261e060..4797fe3 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10861,14 +10861,6 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, } MVT VecTy = N->getValueType(0).getSimpleVT(); - - // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is - // aligned and the type is a vector with elements up to 4 bytes - if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16) - && VecTy.getScalarSizeInBits() <= 32 ) { - return SDValue(); - } - SDValue LoadOps[] = { Chain, Base }; SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, DAG.getVTList(MVT::v2f64, MVT::Other), @@ -10933,13 +10925,6 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, SDValue Src = N->getOperand(SrcOpnd); MVT VecTy = Src.getValueType().getSimpleVT(); - // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is - // aligned and the type is a vector with elements up to 4 bytes - if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16) - && VecTy.getScalarSizeInBits() <= 32 ) { - return SDValue(); - } - // All stores are done as v2f64 and possible bit cast. if (VecTy != MVT::v2f64) { Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index f203c67..b0c1bcc 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -138,7 +138,7 @@ let Uses = [RM] in { def LXVW4X : XX1Form<31, 780, (outs vsrc:$XT), (ins memrr:$src), "lxvw4x $XT, $src", IIC_LdStLFD, - []>; + [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>; } // mayLoad // Store indexed instructions @@ -160,7 +160,7 @@ let Uses = [RM] in { def STXVW4X : XX1Form<31, 908, (outs), (ins vsrc:$XT, memrr:$dst), "stxvw4x $XT, $dst", IIC_LdStSTFD, - []>; + [(store v4i32:$XT, xoaddr:$dst)]>; } } // mayStore @@ -1045,6 +1045,8 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in { // Stores. def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), + (STXVW4X $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst), @@ -1055,12 +1057,8 @@ let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in { def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>; def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), - (STXVW4X $rS, xoaddr:$dst)>; } // Permutes. @@ -1884,8 +1882,8 @@ let Predicates = [IsLittleEndian, HasVSX] in def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>; -def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; -def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; let Predicates = [IsLittleEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (LE) diff --git a/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll b/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll index 4a8fd90..924e04a 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll @@ -1,6 +1,5 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE \ -; RUN: --implicit-check-not xxswapd +; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE @@ -9,15 +8,13 @@ ; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \ -; RUN: --implicit-check-not xxswapd +; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=pwr8 -mattr=-vsx < %s | \ -; RUN: FileCheck %s -check-prefix=CHECK-LE-NOVSX --implicit-check-not xxswapd +; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr9 -ppc-vsr-nums-as-vr < %s | FileCheck %s \ @@ -29,7 +26,7 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr9 -mattr=-power9-vector -mattr=-direct-move < %s | \ -; RUN: FileCheck %s -check-prefix=CHECK-LE --implicit-check-not xxswapd +; RUN: FileCheck %s -check-prefix=CHECK-LE @x = common global <1 x i128> zeroinitializer, align 16 @y = common global <1 x i128> zeroinitializer, align 16 @@ -202,7 +199,8 @@ define <1 x i128> @call_v1i128_increment_by_one() nounwind { ret <1 x i128> %ret ; CHECK-LE-LABEL: @call_v1i128_increment_by_one -; CHECK-LE: lvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK-LE: lxvd2x [[PARAM:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK-LE: xxswapd 34, [[PARAM]] ; CHECK-LE: bl v1i128_increment_by_one ; CHECK-LE: blr @@ -231,8 +229,10 @@ define <1 x i128> @call_v1i128_increment_by_val() nounwind { ret <1 x i128> %ret ; CHECK-LE-LABEL: @call_v1i128_increment_by_val -; CHECK-LE: lvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK-LE: lvx 3, {{[0-9]+}}, {{[0-9]+}} +; CHECK-LE: lxvd2x [[PARAM1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK-LE: lxvd2x [[PARAM2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK-LE-DAG: xxswapd 34, [[PARAM1]] +; CHECK-LE-DAG: xxswapd 35, [[PARAM2]] ; CHECK-LE: bl v1i128_increment_by_val ; CHECK-LE: blr diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-1.ll b/llvm/test/CodeGen/PowerPC/swaps-le-1.ll index 29c94f8..cb83bf2 100644 --- a/llvm/test/CodeGen/PowerPC/swaps-le-1.ll +++ b/llvm/test/CodeGen/PowerPC/swaps-le-1.ll @@ -13,12 +13,6 @@ ; RUN: -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s \ ; RUN: | FileCheck -check-prefix=NOOPTSWAP %s -; LH: 2016-11-17 -; Updated align attritue from 16 to 8 to keep swap instructions tests. -; Changes have been made on little-endian to use lvx and stvx -; instructions instead of lxvd2x/xxswapd and xxswapd/stxvd2x for -; aligned vectors with elements up to 4 bytes - ; This test was generated from the following source: ; ; #define N 4096 @@ -35,10 +29,10 @@ ; } ; } -@cb = common global [4096 x i32] zeroinitializer, align 8 -@cc = common global [4096 x i32] zeroinitializer, align 8 -@cd = common global [4096 x i32] zeroinitializer, align 8 -@ca = common global [4096 x i32] zeroinitializer, align 8 +@cb = common global [4096 x i32] zeroinitializer, align 16 +@cc = common global [4096 x i32] zeroinitializer, align 16 +@cd = common global [4096 x i32] zeroinitializer, align 16 +@ca = common global [4096 x i32] zeroinitializer, align 16 define void @foo() { entry: @@ -48,63 +42,63 @@ vector.body: %index = phi i64 [ 0, %entry ], [ %index.next.3, %vector.body ] %0 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index %1 = bitcast i32* %0 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %1, align 8 + %wide.load = load <4 x i32>, <4 x i32>* %1, align 16 %2 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index %3 = bitcast i32* %2 to <4 x i32>* - %wide.load13 = load <4 x i32>, <4 x i32>* %3, align 8 + %wide.load13 = load <4 x i32>, <4 x i32>* %3, align 16 %4 = add nsw <4 x i32> %wide.load13, %wide.load %5 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index %6 = bitcast i32* %5 to <4 x i32>* - %wide.load14 = load <4 x i32>, <4 x i32>* %6, align 8 + %wide.load14 = load <4 x i32>, <4 x i32>* %6, align 16 %7 = mul nsw <4 x i32> %4, %wide.load14 %8 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index %9 = bitcast i32* %8 to <4 x i32>* - store <4 x i32> %7, <4 x i32>* %9, align 8 + store <4 x i32> %7, <4 x i32>* %9, align 16 %index.next = add nuw nsw i64 %index, 4 %10 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next %11 = bitcast i32* %10 to <4 x i32>* - %wide.load.1 = load <4 x i32>, <4 x i32>* %11, align 8 + %wide.load.1 = load <4 x i32>, <4 x i32>* %11, align 16 %12 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next %13 = bitcast i32* %12 to <4 x i32>* - %wide.load13.1 = load <4 x i32>, <4 x i32>* %13, align 8 + %wide.load13.1 = load <4 x i32>, <4 x i32>* %13, align 16 %14 = add nsw <4 x i32> %wide.load13.1, %wide.load.1 %15 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next %16 = bitcast i32* %15 to <4 x i32>* - %wide.load14.1 = load <4 x i32>, <4 x i32>* %16, align 8 + %wide.load14.1 = load <4 x i32>, <4 x i32>* %16, align 16 %17 = mul nsw <4 x i32> %14, %wide.load14.1 %18 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next %19 = bitcast i32* %18 to <4 x i32>* - store <4 x i32> %17, <4 x i32>* %19, align 8 + store <4 x i32> %17, <4 x i32>* %19, align 16 %index.next.1 = add nuw nsw i64 %index.next, 4 %20 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next.1 %21 = bitcast i32* %20 to <4 x i32>* - %wide.load.2 = load <4 x i32>, <4 x i32>* %21, align 8 + %wide.load.2 = load <4 x i32>, <4 x i32>* %21, align 16 %22 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next.1 %23 = bitcast i32* %22 to <4 x i32>* - %wide.load13.2 = load <4 x i32>, <4 x i32>* %23, align 8 + %wide.load13.2 = load <4 x i32>, <4 x i32>* %23, align 16 %24 = add nsw <4 x i32> %wide.load13.2, %wide.load.2 %25 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next.1 %26 = bitcast i32* %25 to <4 x i32>* - %wide.load14.2 = load <4 x i32>, <4 x i32>* %26, align 8 + %wide.load14.2 = load <4 x i32>, <4 x i32>* %26, align 16 %27 = mul nsw <4 x i32> %24, %wide.load14.2 %28 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next.1 %29 = bitcast i32* %28 to <4 x i32>* - store <4 x i32> %27, <4 x i32>* %29, align 8 + store <4 x i32> %27, <4 x i32>* %29, align 16 %index.next.2 = add nuw nsw i64 %index.next.1, 4 %30 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next.2 %31 = bitcast i32* %30 to <4 x i32>* - %wide.load.3 = load <4 x i32>, <4 x i32>* %31, align 8 + %wide.load.3 = load <4 x i32>, <4 x i32>* %31, align 16 %32 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next.2 %33 = bitcast i32* %32 to <4 x i32>* - %wide.load13.3 = load <4 x i32>, <4 x i32>* %33, align 8 + %wide.load13.3 = load <4 x i32>, <4 x i32>* %33, align 16 %34 = add nsw <4 x i32> %wide.load13.3, %wide.load.3 %35 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next.2 %36 = bitcast i32* %35 to <4 x i32>* - %wide.load14.3 = load <4 x i32>, <4 x i32>* %36, align 8 + %wide.load14.3 = load <4 x i32>, <4 x i32>* %36, align 16 %37 = mul nsw <4 x i32> %34, %wide.load14.3 %38 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next.2 %39 = bitcast i32* %38 to <4 x i32>* - store <4 x i32> %37, <4 x i32>* %39, align 8 + store <4 x i32> %37, <4 x i32>* %39, align 16 %index.next.3 = add nuw nsw i64 %index.next.2, 4 %40 = icmp eq i64 %index.next.3, 4096 br i1 %40, label %for.end, label %vector.body diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-2.ll b/llvm/test/CodeGen/PowerPC/swaps-le-2.ll index e7751a1..0963b92 100644 --- a/llvm/test/CodeGen/PowerPC/swaps-le-2.ll +++ b/llvm/test/CodeGen/PowerPC/swaps-le-2.ll @@ -2,13 +2,6 @@ ; Test swap removal when a vector splat must be adjusted to make it legal. ; - -; LH: 2016-11-17 -; Updated align attritue from 16 to 8 to keep swap instructions tests. -; Changes have been made on little-endian to use lvx and stvx -; instructions instead of lxvd2x/xxswapd and xxswapd/stxvd2x for -; aligned vectors with elements up to 4 bytes - ; Test generated from following C code: ; ; vector char vc = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; @@ -35,37 +28,37 @@ ; vir = (vector int){vi[1], vi[1], vi[1], vi[1]}; ; } -@vc = global <16 x i8> , align 8 -@vs = global <8 x i16> , align 8 -@vi = global <4 x i32> , align 8 -@vcr = common global <16 x i8> zeroinitializer, align 8 -@vsr = common global <8 x i16> zeroinitializer, align 8 -@vir = common global <4 x i32> zeroinitializer, align 8 +@vc = global <16 x i8> , align 16 +@vs = global <8 x i16> , align 16 +@vi = global <4 x i32> , align 16 +@vcr = common global <16 x i8> zeroinitializer, align 16 +@vsr = common global <8 x i16> zeroinitializer, align 16 +@vir = common global <4 x i32> zeroinitializer, align 16 ; Function Attrs: nounwind define void @cfoo() { entry: - %0 = load <16 x i8>, <16 x i8>* @vc, align 8 + %0 = load <16 x i8>, <16 x i8>* @vc, align 16 %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> - store <16 x i8> %vecinit30, <16 x i8>* @vcr, align 8 + store <16 x i8> %vecinit30, <16 x i8>* @vcr, align 16 ret void } ; Function Attrs: nounwind define void @sfoo() { entry: - %0 = load <8 x i16>, <8 x i16>* @vs, align 8 + %0 = load <8 x i16>, <8 x i16>* @vs, align 16 %vecinit14 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> - store <8 x i16> %vecinit14, <8 x i16>* @vsr, align 8 + store <8 x i16> %vecinit14, <8 x i16>* @vsr, align 16 ret void } ; Function Attrs: nounwind define void @ifoo() { entry: - %0 = load <4 x i32>, <4 x i32>* @vi, align 8 + %0 = load <4 x i32>, <4 x i32>* @vi, align 16 %vecinit6 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> - store <4 x i32> %vecinit6, <4 x i32>* @vir, align 8 + store <4 x i32> %vecinit6, <4 x i32>* @vir, align 16 ret void } diff --git a/llvm/test/CodeGen/PowerPC/vsx-ldst.ll b/llvm/test/CodeGen/PowerPC/vsx-ldst.ll index d8dd635..a146182 100644 --- a/llvm/test/CodeGen/PowerPC/vsx-ldst.ll +++ b/llvm/test/CodeGen/PowerPC/vsx-ldst.ll @@ -14,10 +14,8 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s > %t -; RUN: grep lxvd2x < %t | count 3 -; RUN: grep lvx < %t | count 3 -; RUN: grep stxvd2x < %t | count 3 -; RUN: grep stvx < %t | count 3 +; RUN: grep lxvd2x < %t | count 6 +; RUN: grep stxvd2x < %t | count 6 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s > %t diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll index f1a165c..7bd4186 100644 --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -1,17 +1,8 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr7 \ -; RUN: -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 \ -; RUN: -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | \ -; RUN: FileCheck -check-prefix=CHECK-REG %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 \ -; RUN: -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx -fast-isel -O0 < %s |\ -; RUN: FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 \ -; RUN: -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx -fast-isel -O0 < %s |\ -; RUN: FileCheck -check-prefix=CHECK-FISL %s -; RUN: llc -verify-machineinstrs -mcpu=pwr8 \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mattr=+vsx < %s | \ -; RUN: FileCheck -check-prefix=CHECK-LE %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-REG %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx -fast-isel -O0 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx -fast-isel -O0 < %s | FileCheck -check-prefix=CHECK-FISL %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-LE %s define double @test1(double %a, double %b) { entry: @@ -654,8 +645,8 @@ define <4 x float> @test32(<4 x float>* %a) { ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test32 -; CHECK-LE: lvx 2, 0, 3 -; CHECK-LE-NOT: xxswapd +; CHECK-LE: lxvd2x [[V1:[0-9]+]], 0, 3 +; CHECK-LE: xxswapd 34, [[V1]] ; CHECK-LE: blr } @@ -672,8 +663,8 @@ define void @test33(<4 x float>* %a, <4 x float> %b) { ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test33 -; CHECK-LE-NOT: xxswapd -; CHECK-LE: stvx 2, 0, 3 +; CHECK-LE: xxswapd [[V1:[0-9]+]], 34 +; CHECK-LE: stxvd2x [[V1]], 0, 3 ; CHECK-LE: blr } @@ -725,8 +716,8 @@ define <4 x i32> @test34(<4 x i32>* %a) { ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test34 -; CHECK-LE: lvx 2, 0, 3 -; CHECK-LE-NOT: xxswapd +; CHECK-LE: lxvd2x [[V1:[0-9]+]], 0, 3 +; CHECK-LE: xxswapd 34, [[V1]] ; CHECK-LE: blr } @@ -743,8 +734,8 @@ define void @test35(<4 x i32>* %a, <4 x i32> %b) { ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test35 -; CHECK-LE-NOT: xxswapd -; CHECK-LE: stvx 2, 0, 3 +; CHECK-LE: xxswapd [[V1:[0-9]+]], 34 +; CHECK-LE: stxvd2x [[V1]], 0, 3 ; CHECK-LE: blr } @@ -1159,9 +1150,9 @@ define <2 x i32> @test80(i32 %v) { ; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3 ; CHECK-LE-DAG: xxswapd [[V1:[0-9]+]], [[R1]] ; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI -; CHECK-LE-DAG: lvx 3, 0, [[R2]] +; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]] ; CHECK-LE-DAG: xxspltw 34, [[V1]] -; CHECK-LE-NOT: xxswapd 35, [[V2]] +; CHECK-LE-DAG: xxswapd 35, [[V2]] ; CHECK-LE: vadduwm 2, 2, 3 ; CHECK-LE: blr } -- 2.7.4