From 86549c7528db91a6d2b856f626de299313743709 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 23 Oct 2019 14:22:16 +0100 Subject: [PATCH] [SelectionDAG] Add support for FP_ROUND in WidenVectorOperand. Summary: This is used on AMDGPU for rounding from v3f64 (which is illegal) to v3f32 (which is legal). Subscribers: jvesely, nhaehnle, tpr, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69339 --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 18 ++++++++++++++---- llvm/test/CodeGen/AMDGPU/fptrunc.ll | 10 ++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 3763e88..6c47c5b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4161,6 +4161,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: + case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: case ISD::FP_TO_SINT: case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: @@ -4297,13 +4299,21 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) { SDValue Res; if (N->isStrictFPOpcode()) { - Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, - { N->getOperand(0), InOp }); + if (Opcode == ISD::STRICT_FP_ROUND) + Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, + { N->getOperand(0), InOp, N->getOperand(2) }); + else + Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, + { N->getOperand(0), InOp }); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); - } else - Res = DAG.getNode(Opcode, dl, WideVT, InOp); + } else { + if (Opcode == ISD::FP_ROUND) + Res = DAG.getNode(Opcode, dl, WideVT, InOp, N->getOperand(1)); + else + Res = DAG.getNode(Opcode, dl, WideVT, InOp); + } return DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, VT, Res, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.ll index d9c5b7e..80f0a37 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.ll @@ -30,6 +30,16 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f32(<2 x float> addrspace(1)* %out ret void } +; FUNC-LABEL: {{^}}fptrunc_v3f64_to_v3f32: +; GCN: v_cvt_f32_f64_e32 +; GCN: v_cvt_f32_f64_e32 +; GCN: v_cvt_f32_f64_e32 +define amdgpu_kernel void @fptrunc_v3f64_to_v3f32(<3 x float> addrspace(1)* %out, <3 x double> %in) { + %result = fptrunc <3 x double> %in to <3 x float> + store <3 x float> %result, <3 x float> addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}fptrunc_v4f64_to_v4f32: ; GCN: v_cvt_f32_f64_e32 ; GCN: v_cvt_f32_f64_e32 -- 2.7.4