From c27734c183707247b43b215d3cf55c3dbc60ef5d Mon Sep 17 00:00:00 2001 From: Amy Kwan Date: Thu, 2 Dec 2021 15:24:06 -0600 Subject: [PATCH] [PowerPC] Fix load/store selection infrastructure when load/store intrinsics are used on P10. The load/store infrastructure previously made an incorrect assumption that whenever it is used with a load/store intrinsic on Power10 - those intrinsics would automatically be the lxvp/stxvp intrinsics introduced in Power10. However, this is obviously not the case as there are multiple instances of pre-P10 intrinsics that use the refactored load/store implementation. This patch corrects this assumption, and produces the expected intrinsic on pre-P10. Differential Revision: https://reviews.llvm.org/D114978 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 16 +++---- llvm/test/CodeGen/PowerPC/lxvw4x-bug.ll | 14 ++++-- llvm/test/CodeGen/PowerPC/swaps-le-8.ll | 12 +++++ llvm/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll | 56 +++++++++++++----------- llvm/test/CodeGen/PowerPC/vsx_builtins.ll | 28 +++++++----- 5 files changed, 79 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ec7e30d..67a5d66 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -17548,14 +17548,14 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N, if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) || (ParentOp == ISD::INTRINSIC_VOID))) { unsigned ID = cast(Parent->getOperand(1))->getZExtValue(); - assert( - ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) && - "Only the paired load and store (lxvp/stxvp) intrinsics are valid."); - SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp) ? Parent->getOperand(2) - : Parent->getOperand(3); - computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG); - FlagSet |= PPC::MOF_Vector; - return FlagSet; + if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) { + SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp) + ? Parent->getOperand(2) + : Parent->getOperand(3); + computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG); + FlagSet |= PPC::MOF_Vector; + return FlagSet; + } } // Mark this as something we don't want to handle here if it is atomic diff --git a/llvm/test/CodeGen/PowerPC/lxvw4x-bug.ll b/llvm/test/CodeGen/PowerPC/lxvw4x-bug.ll index 3318567..b9abb81 100644 --- a/llvm/test/CodeGen/PowerPC/lxvw4x-bug.ll +++ b/llvm/test/CodeGen/PowerPC/lxvw4x-bug.ll @@ -3,11 +3,19 @@ ; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr9 \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s \ -; RUN: | FileCheck %s --check-prefix=CHECK-P9 --implicit-check-not xxswapd +; RUN: | FileCheck %s --check-prefix=CHECK-P9UP --implicit-check-not xxswapd ; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr9 -mattr=-power9-vector \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr10 \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-P9UP + +; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr10 \ +; RUN: -mtriple=powerpc64-unknown-unknown < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-P9UP + ; Function Attrs: nounwind define void @test() { entry: @@ -26,8 +34,8 @@ entry: ; CHECK: lwa [[REG0:[0-9]+]], ; CHECK: lxvd2x [[REG1:[0-9]+]], {{[0-9]+}}, [[REG0]] ; CHECK: xxswapd [[REG1]], [[REG1]] -; CHECK-P9: lwa [[REG0:[0-9]+]], -; CHECK-P9: lxvx [[REG1:[0-9]+]], {{[0-9]+}}, [[REG0]] +; CHECK-P9UP: lwa [[REG0:[0-9]+]], +; CHECK-P9UP: lxvx [[REG1:[0-9]+]], {{[0-9]+}}, [[REG0]] store <4 x i32> %4, <4 x i32>* %j, align 16 ret void } diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-8.ll b/llvm/test/CodeGen/PowerPC/swaps-le-8.ll index 81471b8..2874a0c2 100644 --- a/llvm/test/CodeGen/PowerPC/swaps-le-8.ll +++ b/llvm/test/CodeGen/PowerPC/swaps-le-8.ll @@ -1,6 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-P9UP +; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr10 \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-P9UP define dso_local void @test(i64* %Src, i64* nocapture %Tgt) local_unnamed_addr { ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry @@ -8,6 +14,12 @@ define dso_local void @test(i64* %Src, i64* nocapture %Tgt) local_unnamed_addr { ; CHECK-NEXT: xxswapd 0, 0 ; CHECK-NEXT: stxvd2x 0, 0, 4 ; CHECK-NEXT: blr +; +; CHECK-P9UP-LABEL: test: +; CHECK-P9UP: # %bb.0: # %entry +; CHECK-P9UP-NEXT: lxvd2x 0, 0, 3 +; CHECK-P9UP-NEXT: stxv 0, 0(4) +; CHECK-P9UP-NEXT: blr entry: %0 = bitcast i64* %Src to i8* %1 = tail call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %0) #2 diff --git a/llvm/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll b/llvm/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll index 0f04265..51a1d3e 100644 --- a/llvm/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll +++ b/llvm/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll @@ -3,11 +3,15 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ -; RUN: --check-prefix=CHECK-P9 +; RUN: --check-prefix=CHECK-P9UP ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector -O2 \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -O2 \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-P9UP + @vf = global <4 x float> , align 16 @vd = global <2 x double> , align 16 @vsi = global <4 x i32> , align 16 @@ -24,84 +28,84 @@ define void @test1() { entry: ; CHECK-LABEL: test1 -; CHECK-P9-LABEL: test1 +; CHECK-P9UP-LABEL: test1 ; CHECK: lxvd2x -; CHECK-P9-DAG: lxv +; CHECK-P9UP-DAG: lxv %0 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* bitcast (<4 x i32>* @vsi to i8*)) ; CHECK: stxvd2x -; CHECK-P9-DAG: stxv +; CHECK-P9UP-DAG: stxv store <4 x i32> %0, <4 x i32>* @res_vsi, align 16 ; CHECK: lxvd2x -; CHECK-P9-DAG: lxv +; CHECK-P9UP-DAG: lxv %1 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* bitcast (<4 x i32>* @vui to i8*)) ; CHECK: stxvd2x -; CHECK-P9-DAG: stxv +; CHECK-P9UP-DAG: stxv store <4 x i32> %1, <4 x i32>* @res_vui, align 16 ; CHECK: lxvd2x -; CHECK-P9-DAG: lxv +; CHECK-P9UP-DAG: lxv %2 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* bitcast (<4 x float>* @vf to i8*)) %3 = bitcast <4 x i32> %2 to <4 x float> ; CHECK: stxvd2x -; CHECK-P9-DAG: stxv +; CHECK-P9UP-DAG: stxv store <4 x float> %3, <4 x float>* @res_vf, align 16 ; CHECK: lxvd2x -; CHECK-P9-DAG: lxv +; CHECK-P9UP-DAG: lxv %4 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* bitcast (<2 x i64>* @vsll to i8*)) %5 = bitcast <2 x double> %4 to <2 x i64> ; CHECK: stxvd2x -; CHECK-P9-DAG: stxv +; CHECK-P9UP-DAG: stxv store <2 x i64> %5, <2 x i64>* @res_vsll, align 16 ; CHECK: lxvd2x -; CHECK-P9-DAG: lxv +; CHECK-P9UP-DAG: lxv %6 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* bitcast (<2 x i64>* @vull to i8*)) %7 = bitcast <2 x double> %6 to <2 x i64> ; CHECK: stxvd2x -; CHECK-P9-DAG: stxv +; CHECK-P9UP-DAG: stxv store <2 x i64> %7, <2 x i64>* @res_vull, align 16 ; CHECK: lxvd2x -; CHECK-P9-DAG: lxv +; CHECK-P9UP-DAG: lxv %8 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* bitcast (<2 x double>* @vd to i8*)) ; CHECK: stxvd2x -; CHECK-P9-DAG: stxv +; CHECK-P9UP-DAG: stxv store <2 x double> %8, <2 x double>* @res_vd, align 16 ; CHECK: lxvd2x -; CHECK-P9-DAG: lxv +; CHECK-P9UP-DAG: lxv %9 = load <4 x i32>, <4 x i32>* @vsi, align 16 ; CHECK: stxvd2x -; CHECK-P9-DAG: stxv +; CHECK-P9UP-DAG: stxv call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %9, i8* bitcast (<4 x i32>* @res_vsi to i8*)) ; CHECK: lxvd2x -; CHECK-P9-DAG: lxv +; CHECK-P9UP-DAG: lxv %10 = load <4 x i32>, <4 x i32>* @vui, align 16 ; CHECK: stxvd2x -; CHECK-P9-DAG: stxv +; CHECK-P9UP-DAG: stxv call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %10, i8* bitcast (<4 x i32>* @res_vui to i8*)) ; CHECK: lxvd2x -; CHECK-P9-DAG: lxv +; CHECK-P9UP-DAG: lxv %11 = load <4 x float>, <4 x float>* @vf, align 16 %12 = bitcast <4 x float> %11 to <4 x i32> ; CHECK: stxvd2x -; CHECK-P9-DAG: stxv +; CHECK-P9UP-DAG: stxv call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %12, i8* bitcast (<4 x float>* @res_vf to i8*)) ; CHECK: lxvd2x -; CHECK-P9-DAG: lxv +; CHECK-P9UP-DAG: lxv %13 = load <2 x i64>, <2 x i64>* @vsll, align 16 %14 = bitcast <2 x i64> %13 to <2 x double> ; CHECK: stxvd2x -; CHECK-P9-DAG: stxv +; CHECK-P9UP-DAG: stxv call void @llvm.ppc.vsx.stxvd2x(<2 x double> %14, i8* bitcast (<2 x i64>* @res_vsll to i8*)) ; CHECK: lxvd2x -; CHECK-P9-DAG: lxv +; CHECK-P9UP-DAG: lxv %15 = load <2 x i64>, <2 x i64>* @vull, align 16 %16 = bitcast <2 x i64> %15 to <2 x double> ; CHECK: stxvd2x -; CHECK-P9-DAG: stxv +; CHECK-P9UP-DAG: stxv call void @llvm.ppc.vsx.stxvd2x(<2 x double> %16, i8* bitcast (<2 x i64>* @res_vull to i8*)) ; CHECK: lxvd2x -; CHECK-P9-DAG: lxv +; CHECK-P9UP-DAG: lxv %17 = load <2 x double>, <2 x double>* @vd, align 16 ; CHECK: stxvd2x -; CHECK-P9-DAG: stxv +; CHECK-P9UP-DAG: stxv call void @llvm.ppc.vsx.stxvd2x(<2 x double> %17, i8* bitcast (<2 x double>* @res_vd to i8*)) ret void } diff --git a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll index be69c08..d039779 100644 --- a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll +++ b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll @@ -2,7 +2,7 @@ ; RUN: llc -relocation-model=static -verify-machineinstrs -mcpu=pwr9 \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,CHECK-P9 +; RUN: --check-prefixes=CHECK,CHECK-P9UP ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \ @@ -14,7 +14,7 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr9 \ ; RUN: -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \ -; RUN: --check-prefixes=CHECK,CHECK-P9 +; RUN: --check-prefixes=CHECK,CHECK-P9UP ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector \ ; RUN: -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \ @@ -23,6 +23,14 @@ ; RUN: -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,CHECK-INTRIN +; RUN: llc -verify-machineinstrs -mcpu=pwr10 \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,CHECK-P9UP +; RUN: llc -verify-machineinstrs -mcpu=pwr10 \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,CHECK-P9UP ; Function Attrs: nounwind readnone define <4 x i32> @test1(i8* %a) { @@ -164,10 +172,10 @@ entry: ; Function Attrs: nounwind readnone define <2 x double> @test_lxvd2x(i8* %a) { -; CHECK-P9-LABEL: test_lxvd2x: -; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv v2, 0(r3) -; CHECK-P9-NEXT: blr +; CHECK-P9UP-LABEL: test_lxvd2x: +; CHECK-P9UP: # %bb.0: # %entry +; CHECK-P9UP-NEXT: lxv v2, 0(r3) +; CHECK-P9UP-NEXT: blr ; ; CHECK-NOINTRIN-LABEL: test_lxvd2x: ; CHECK-NOINTRIN: # %bb.0: # %entry @@ -188,10 +196,10 @@ declare <2 x double> @llvm.ppc.vsx.lxvd2x(i8*) ; Function Attrs: nounwind readnone define void @test_stxvd2x(<2 x double> %a, i8* %b) { -; CHECK-P9-LABEL: test_stxvd2x: -; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: stxv v2, 0(r5) -; CHECK-P9-NEXT: blr +; CHECK-P9UP-LABEL: test_stxvd2x: +; CHECK-P9UP: # %bb.0: # %entry +; CHECK-P9UP-NEXT: stxv v2, 0(r5) +; CHECK-P9UP-NEXT: blr ; ; CHECK-NOINTRIN-LABEL: test_stxvd2x: ; CHECK-NOINTRIN: # %bb.0: # %entry -- 2.7.4