From 6d1596a98c47d026d855fd882ea5708b232d9a66 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Mon, 19 Mar 2018 18:52:20 +0000 Subject: [PATCH] [PowerPC][Power9]Legalize and emit code for quad-precision add/div/mul/sub Legalize and emit code for quad-precision floating point operations: * xsaddqp * xssubqp * xsdivqp * xsmulqp Differential Revision: https://reviews.llvm.org/D44506 llvm-svn: 327878 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 7 +++ llvm/lib/Target/PowerPC/PPCInstrVSX.td | 25 ++++++++-- llvm/test/CodeGen/PowerPC/f128-arith.ll | 73 +++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/f128-arith.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 040a2cb..82f0334 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -785,6 +785,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SHL, MVT::v1i128, Legal); setOperationAction(ISD::SRL, MVT::v1i128, Legal); setOperationAction(ISD::SRA, MVT::v1i128, Expand); + + addRegisterClass(MVT::f128, &PPC::VRRCRegClass); + setOperationAction(ISD::FADD, MVT::f128, Legal); + setOperationAction(ISD::FSUB, MVT::f128, Legal); + setOperationAction(ISD::FDIV, MVT::f128, Legal); + setOperationAction(ISD::FMUL, MVT::f128, Legal); + } if (Subtarget.hasP9Altivec()) { diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 7bc022f..95191aa 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2397,14 +2397,21 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Quad-Precision Scalar Floating-Point Arithmetic Instructions: // Add/Divide/Multiply/Subtract - def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp" , []>; + let isCommutable = 1 in { + def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp", + [(set f128:$vT, (fadd f128:$vA, f128:$vB))]>; def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo", []>; - def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp" , []>; - def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo", []>; - def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp" , []>; + def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp", + [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>; def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo", []>; - def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" , []>; + } + + def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" , + [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>; def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo", []>; + def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp", + [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>; + def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo", []>; // Square-Root def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp" , []>; @@ -2865,12 +2872,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def : Pat<(v4f32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; def : Pat<(v2i64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; def : Pat<(v2f64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(f128 (quadwOffsetLoad iqaddr:$src)), + (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>; def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>; def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>; def : Pat<(quadwOffsetStore v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; def : Pat<(quadwOffsetStore v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; def : Pat<(quadwOffsetStore v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore f128:$rS, iqaddr:$dst), + (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>; def : Pat<(quadwOffsetStore v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; @@ -2884,6 +2895,10 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(f128 (nonQuadwOffsetLoad xoaddr:$src)), + (COPY_TO_REGCLASS (LXVX xoaddr:$src), VRRC)>; + def : Pat<(nonQuadwOffsetStore f128:$rS, xoaddr:$dst), + (STXVX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst), diff --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll b/llvm/test/CodeGen/PowerPC/f128-arith.ll new file mode 100644 index 0000000..540754d --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll @@ -0,0 +1,73 @@ +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s + +; Function Attrs: norecurse nounwind +define void @qpAdd(fp128* nocapture readonly %a, fp128* nocapture %res) { +entry: + %0 = load fp128, fp128* %a, align 16 + %add = fadd fp128 %0, %0 + store fp128 %add, fp128* %res, align 16 + ret void +; CHECK-LABEL: qpAdd +; CHECK-NOT bl __addtf3 +; CHECK xsaddqp +; CHECK stxv +; CHECK blr +} + +; Function Attrs: norecurse nounwind +define void @qpSub(fp128* nocapture readonly %a, fp128* nocapture %res) { +entry: + %0 = load fp128, fp128* %a, align 16 + %sub = fsub fp128 %0, %0 + store fp128 %sub, fp128* %res, align 16 + ret void +; CHECK-LABEL: qpSub +; CHECK-NOT bl __subtf3 +; CHECK xssubqp +; CHECK stxv +; CHECK blr +} + +; Function Attrs: norecurse nounwind +define void @qpMul(fp128* nocapture readonly %a, fp128* nocapture %res) { +entry: + %0 = load fp128, fp128* %a, align 16 + %mul = fmul fp128 %0, %0 + store fp128 %mul, fp128* %res, align 16 + ret void +; CHECK-LABEL: qpMul +; CHECK-NOT bl __multf3 +; CHECK xsmulqp +; CHECK stxv +; CHECK blr +} + +; Function Attrs: norecurse nounwind +define void @qpDiv(fp128* nocapture readonly %a, fp128* nocapture %res) { +entry: + %0 = load fp128, fp128* %a, align 16 + %div = fdiv fp128 %0, %0 + store fp128 %div, fp128* %res, align 16 + ret void +; CHECK-LABEL: qpDiv +; CHECK-NOT bl __divtf3 +; CHECK xsdivqp +; CHECK stxv +; CHECK blr +} + +define void @testLdNSt(i8* nocapture readonly %PtrC, fp128* nocapture %PtrF) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %PtrC, i64 4 + %0 = bitcast i8* %add.ptr to fp128* + %1 = load fp128, fp128* %0, align 16 + %2 = bitcast fp128* %PtrF to i8* + %add.ptr1 = getelementptr inbounds i8, i8* %2, i64 8 + %3 = bitcast i8* %add.ptr1 to fp128* + store fp128 %1, fp128* %3, align 16 + ret void +; CHECK-LABEL: testLdNSt +; CHECK lxvx +; CHECK stxvx +; CHECK-NEXT blr +} -- 2.7.4