From ff769dd11128839e00eea546f7e68680d9acfd77 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Fri, 16 Apr 2021 18:51:07 -0500 Subject: [PATCH] [PowerPC] Minor improvement for insert_vector_elt codegen For v2f64, all VSX subtargets can insert an element with a single XXPERMDI. --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 10 +- llvm/lib/Target/PowerPC/PPCInstrVSX.td | 8 ++ llvm/test/CodeGen/PowerPC/swaps-le-6.ll | 147 ++++++++++++--------- llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll | 38 +++++- 4 files changed, 127 insertions(+), 76 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 48dba75..d4efb2b 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -878,6 +878,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (Subtarget.hasVSX()) { setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); } if (Subtarget.hasP8Altivec()) @@ -1247,10 +1248,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); } - if (Subtarget.isISA3_1()) { + if (Subtarget.isISA3_1()) setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); - } } if (Subtarget.pairedVectorMemops()) { @@ -10341,6 +10340,9 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SDValue V2 = Op.getOperand(1); SDValue V3 = Op.getOperand(2); + if (VT == MVT::v2f64 && C) + return Op; + if (Subtarget.isISA3_1()) { // On P10, we have legal lowering for constant and variable indices for // integer vectors. @@ -10353,7 +10355,7 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, if (VT == MVT::v4f32 || VT == MVT::v2f64) { if (!C || (VT == MVT::v4f32 && dyn_cast(V2))) return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3); - return SDValue(); + return Op; } } diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 471ab32..869e06c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2907,6 +2907,10 @@ def : Pat; def : Pat; +def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 0)), + (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $B, sub_64), $A, 1))>; +def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 1)), + (v2f64 (XXPERMDI $A, (SUBREG_TO_REG (i64 1), $B, sub_64), 0))>; } // HasVSX, IsBigEndian // Any little endian VSX subtarget. @@ -3012,6 +3016,10 @@ def : Pat; def : Pat; +def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 0)), + (v2f64 (XXPERMDI $A, (SUBREG_TO_REG (i64 1), $B, sub_64), 0))>; +def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 1)), + (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $B, sub_64), $A, 1))>; } // HasVSX, IsLittleEndian // Any pre-Power9 VSX subtarget. diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll index 4437e67..e3934ed 100644 --- a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll +++ b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -relocation-model=pic -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \ ; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -O3 < %s | FileCheck %s @@ -21,41 +22,48 @@ define void @bar0() { ; CHECK-LABEL: bar0: -; CHECK: # %bb.0: # %entry -; CHECK: addis r3, r2, .LC0@toc@ha -; CHECK: addis r4, r2, .LC1@toc@ha -; CHECK: ld r3, .LC0@toc@l(r3) -; CHECK: addis r3, r2, .LC2@toc@ha -; CHECK: ld r3, .LC2@toc@l(r3) -; CHECK: xxmrgld vs0, vs0, vs1 -; CHECK: stxvd2x vs0, 0, r3 -; CHECK: blr -; -; CHECK-P9-NOVECTOR-LABEL: bar0: -; CHECK-P9-NOVECTOR: # %bb.0: # %entry -; CHECK-P9-NOVECTOR: addis r3, r2, .LC0@toc@ha -; CHECK-P9-NOVECTOR: ld r3, .LC0@toc@l(r3) -; CHECK-P9-NOVECTOR: addis r3, r2, .LC1@toc@ha -; CHECK-P9-NOVECTOR: addis r3, r2, .LC2@toc@ha -; CHECK-P9-NOVECTOR: ld r3, .LC2@toc@l(r3) -; CHECK-P9-NOVECTOR: xxmrgld vs0, vs1, vs0 -; CHECK-P9-NOVECTOR: stxvd2x vs0, 0, r3 -; CHECK-P9-NOVECTOR: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: addis r4, r2, .LC1@toc@ha +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: lfdx f0, 0, r3 +; CHECK-NEXT: ld r3, .LC1@toc@l(r4) +; CHECK-NEXT: lxvd2x vs1, 0, r3 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: addis r3, r2, .LC2@toc@ha +; CHECK-NEXT: ld r3, .LC2@toc@l(r3) +; CHECK-NEXT: xxmrgld vs0, vs0, vs1 +; CHECK-NEXT: stxvd2x vs0, 0, r3 +; CHECK-NEXT: blr ; ; CHECK-P9-LABEL: bar0: -; CHECK-P9: # %bb.0: # %entry -; CHECK-P9: addis r3, r2, .LC0@toc@ha -; CHECK-P9: ld r3, .LC0@toc@l(r3) -; CHECK-P9: lxvx vs0, 0, r3 -; CHECK-P9: addis r3, r2, .LC1@toc@ha -; CHECK-P9: ld r3, .LC1@toc@l(r3) -; CHECK-P9: lfd f1, 0(r3) -; CHECK-P9: addis r3, r2, .LC2@toc@ha -; CHECK-P9: ld r3, .LC2@toc@l(r3) -; CHECK-P9: xxswapd vs1, f1 -; CHECK-P9: xxpermdi vs0, vs0, vs1, 1 -; CHECK-P9: stxvx vs0, 0, r3 -; CHECK-P9: blr +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-P9-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-P9-NEXT: lxvx vs0, 0, r3 +; CHECK-P9-NEXT: addis r3, r2, .LC1@toc@ha +; CHECK-P9-NEXT: ld r3, .LC1@toc@l(r3) +; CHECK-P9-NEXT: lfd f1, 0(r3) +; CHECK-P9-NEXT: addis r3, r2, .LC2@toc@ha +; CHECK-P9-NEXT: ld r3, .LC2@toc@l(r3) +; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs1 +; CHECK-P9-NEXT: stxvx vs0, 0, r3 +; CHECK-P9-NEXT: blr +; +; CHECK-P9-NOVECTOR-LABEL: bar0: +; CHECK-P9-NOVECTOR: # %bb.0: # %entry +; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-P9-NOVECTOR-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC1@toc@ha +; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC1@toc@l(r3) +; CHECK-P9-NOVECTOR-NEXT: lfdx f1, 0, r3 +; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC2@toc@ha +; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC2@toc@l(r3) +; CHECK-P9-NOVECTOR-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NOVECTOR-NEXT: xxmrgld vs0, vs1, vs0 +; CHECK-P9-NOVECTOR-NEXT: stxvd2x vs0, 0, r3 +; CHECK-P9-NOVECTOR-NEXT: blr entry: %0 = load <2 x double>, <2 x double>* @x, align 16 %1 = load double, double* @y, align 8 @@ -66,41 +74,48 @@ entry: define void @bar1() { ; CHECK-LABEL: bar1: -; CHECK: # %bb.0: # %entry -; CHECK: addis r3, r2, .LC0@toc@ha -; CHECK: addis r4, r2, .LC1@toc@ha -; CHECK: ld r3, .LC0@toc@l(r3) -; CHECK: addis r3, r2, .LC2@toc@ha -; CHECK: ld r3, .LC2@toc@l(r3) -; CHECK: xxpermdi vs0, vs1, vs0, 1 -; CHECK: stxvd2x vs0, 0, r3 -; CHECK: blr -; -; CHECK-P9-NOVECTOR-LABEL: bar1: -; CHECK-P9-NOVECTOR: # %bb.0: # %entry -; CHECK-P9-NOVECTOR: addis r3, r2, .LC0@toc@ha -; CHECK-P9-NOVECTOR: ld r3, .LC0@toc@l(r3) -; CHECK-P9-NOVECTOR: addis r3, r2, .LC1@toc@ha -; CHECK-P9-NOVECTOR: addis r3, r2, .LC2@toc@ha -; CHECK-P9-NOVECTOR: ld r3, .LC2@toc@l(r3) -; CHECK-P9-NOVECTOR: xxpermdi vs0, vs0, vs1, 1 -; CHECK-P9-NOVECTOR: stxvd2x vs0, 0, r3 -; CHECK-P9-NOVECTOR: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: addis r4, r2, .LC1@toc@ha +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: lfdx f0, 0, r3 +; CHECK-NEXT: ld r3, .LC1@toc@l(r4) +; CHECK-NEXT: lxvd2x vs1, 0, r3 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: addis r3, r2, .LC2@toc@ha +; CHECK-NEXT: ld r3, .LC2@toc@l(r3) +; CHECK-NEXT: xxpermdi vs0, vs1, vs0, 1 +; CHECK-NEXT: stxvd2x vs0, 0, r3 +; CHECK-NEXT: blr ; ; CHECK-P9-LABEL: bar1: -; CHECK-P9: # %bb.0: # %entry -; CHECK-P9: addis r3, r2, .LC0@toc@ha -; CHECK-P9: ld r3, .LC0@toc@l(r3) -; CHECK-P9: lxvx vs0, 0, r3 -; CHECK-P9: addis r3, r2, .LC1@toc@ha -; CHECK-P9: ld r3, .LC1@toc@l(r3) -; CHECK-P9: lfd f1, 0(r3) -; CHECK-P9: addis r3, r2, .LC2@toc@ha -; CHECK-P9: ld r3, .LC2@toc@l(r3) -; CHECK-P9: xxswapd vs1, f1 -; CHECK-P9: xxmrgld vs0, vs1, vs0 -; CHECK-P9: stxvx vs0, 0, r3 -; CHECK-P9: blr +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-P9-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-P9-NEXT: lxvx vs0, 0, r3 +; CHECK-P9-NEXT: addis r3, r2, .LC1@toc@ha +; CHECK-P9-NEXT: ld r3, .LC1@toc@l(r3) +; CHECK-P9-NEXT: lfd f1, 0(r3) +; CHECK-P9-NEXT: addis r3, r2, .LC2@toc@ha +; CHECK-P9-NEXT: ld r3, .LC2@toc@l(r3) +; CHECK-P9-NEXT: xxpermdi vs0, vs1, vs0, 1 +; CHECK-P9-NEXT: stxvx vs0, 0, r3 +; CHECK-P9-NEXT: blr +; +; CHECK-P9-NOVECTOR-LABEL: bar1: +; CHECK-P9-NOVECTOR: # %bb.0: # %entry +; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-P9-NOVECTOR-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC1@toc@ha +; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC1@toc@l(r3) +; CHECK-P9-NOVECTOR-NEXT: lfdx f1, 0, r3 +; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC2@toc@ha +; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC2@toc@l(r3) +; CHECK-P9-NOVECTOR-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NOVECTOR-NEXT: xxpermdi vs0, vs0, vs1, 1 +; CHECK-P9-NOVECTOR-NEXT: stxvd2x vs0, 0, r3 +; CHECK-P9-NOVECTOR-NEXT: blr entry: %0 = load <2 x double>, <2 x double>* @x, align 16 %1 = load double, double* @y, align 8 diff --git a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll index a198604..331d786 100644 --- a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll +++ b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll @@ -3,6 +3,10 @@ ; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \ ; RUN: | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-unknown-linux-gnu < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-P8-BE + ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector -ppc-vsr-nums-as-vr \ ; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \ ; RUN: | FileCheck --check-prefix=CHECK-P9-VECTOR %s @@ -20,6 +24,13 @@ define <2 x double> @testi0(<2 x double>* %p1, double* %p2) { ; CHECK-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-NEXT: blr ; +; CHECK-P8-BE-LABEL: testi0: +; CHECK-P8-BE: # %bb.0: +; CHECK-P8-BE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-BE-NEXT: lfdx f1, 0, r4 +; CHECK-P8-BE-NEXT: xxpermdi v2, vs1, vs0, 1 +; CHECK-P8-BE-NEXT: blr +; ; CHECK-P9-VECTOR-LABEL: testi0: ; CHECK-P9-VECTOR: # %bb.0: ; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3 @@ -30,10 +41,9 @@ define <2 x double> @testi0(<2 x double>* %p1, double* %p2) { ; ; CHECK-P9-LABEL: testi0: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: lfd f1, 0(r4) ; CHECK-P9-NEXT: lxv vs0, 0(r3) -; CHECK-P9-NEXT: xxswapd vs1, f1 -; CHECK-P9-NEXT: xxpermdi v2, vs0, vs1, 1 +; CHECK-P9-NEXT: lfd f1, 0(r4) +; CHECK-P9-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-P9-NEXT: blr %v = load <2 x double>, <2 x double>* %p1 %s = load double, double* %p2 @@ -52,6 +62,13 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) { ; CHECK-NEXT: xxpermdi v2, vs1, vs0, 1 ; CHECK-NEXT: blr ; +; CHECK-P8-BE-LABEL: testi1: +; CHECK-P8-BE: # %bb.0: +; CHECK-P8-BE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-BE-NEXT: lfdx f1, 0, r4 +; CHECK-P8-BE-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-P8-BE-NEXT: blr +; ; CHECK-P9-VECTOR-LABEL: testi1: ; CHECK-P9-VECTOR: # %bb.0: ; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3 @@ -62,10 +79,9 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) { ; ; CHECK-P9-LABEL: testi1: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: lfd f1, 0(r4) ; CHECK-P9-NEXT: lxv vs0, 0(r3) -; CHECK-P9-NEXT: xxswapd vs1, f1 -; CHECK-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-P9-NEXT: lfd f1, 0(r4) +; CHECK-P9-NEXT: xxpermdi v2, vs1, vs0, 1 ; CHECK-P9-NEXT: blr %v = load <2 x double>, <2 x double>* %p1 %s = load double, double* %p2 @@ -82,6 +98,11 @@ define double @teste0(<2 x double>* %p1) { ; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-NEXT: blr ; +; CHECK-P8-BE-LABEL: teste0: +; CHECK-P8-BE: # %bb.0: +; CHECK-P8-BE-NEXT: lfdx f1, 0, r3 +; CHECK-P8-BE-NEXT: blr +; ; CHECK-P9-VECTOR-LABEL: teste0: ; CHECK-P9-VECTOR: # %bb.0: ; CHECK-P9-VECTOR-NEXT: lxvd2x vs1, 0, r3 @@ -107,6 +128,11 @@ define double @teste1(<2 x double>* %p1) { ; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-NEXT: blr ; +; CHECK-P8-BE-LABEL: teste1: +; CHECK-P8-BE: # %bb.0: +; CHECK-P8-BE-NEXT: lfd f1, 8(r3) +; CHECK-P8-BE-NEXT: blr +; ; CHECK-P9-VECTOR-LABEL: teste1: ; CHECK-P9-VECTOR: # %bb.0: ; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3 -- 2.7.4