case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
- case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
- case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return LowerFP_TO_INT(Op, DAG);
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTLZ:
return DAG.getZExtOrTrunc(V, DL, Op.getValueType());
}
-SDValue AMDGPUTargetLowering::LowerFP_TO_SINT(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::LowerFP_TO_INT(SDValue Op,
+ SelectionDAG &DAG) const {
SDValue Src = Op.getOperand(0);
+ unsigned OpOpcode = Op.getOpcode();
+ EVT SrcVT = Src.getValueType();
+ EVT DestVT = Op.getValueType();
- // TODO: Factor out code common with LowerFP_TO_UINT.
+ // Will be selected natively
+ if (SrcVT == MVT::f16 && DestVT == MVT::i16)
+ return Op;
- EVT SrcVT = Src.getValueType();
- if (SrcVT == MVT::f16 ||
- (SrcVT == MVT::f32 && Src.getOpcode() == ISD::FP16_TO_FP)) {
+ // Promote i16 to i32
+ if (DestVT == MVT::i16 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) {
SDLoc DL(Op);
- SDValue FpToInt32 = DAG.getNode(Op.getOpcode(), DL, MVT::i32, Src);
- return DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, FpToInt32);
+ SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src);
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToInt32);
}
- if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
- return LowerFP64_TO_INT(Op, DAG, true);
-
- return SDValue();
-}
-
-SDValue AMDGPUTargetLowering::LowerFP_TO_UINT(SDValue Op,
- SelectionDAG &DAG) const {
- SDValue Src = Op.getOperand(0);
-
- // TODO: Factor out code common with LowerFP_TO_SINT.
-
- EVT SrcVT = Src.getValueType();
if (SrcVT == MVT::f16 ||
(SrcVT == MVT::f32 && Src.getOpcode() == ISD::FP16_TO_FP)) {
SDLoc DL(Op);
- SDValue FpToUInt32 = DAG.getNode(Op.getOpcode(), DL, MVT::i32, Src);
- return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, FpToUInt32);
+ SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src);
+ unsigned Ext =
+ OpOpcode == ISD::FP_TO_SINT ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ return DAG.getNode(Ext, DL, MVT::i64, FpToInt32);
}
- if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
- return LowerFP64_TO_INT(Op, DAG, false);
+ if (DestVT == MVT::i64 && SrcVT == MVT::f64)
+ return LowerFP64_TO_INT(Op, DAG, OpOpcode == ISD::FP_TO_SINT);
return SDValue();
}
SDValue LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, bool Signed) const;
SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
setOperationAction(ISD::FP_TO_FP16, MVT::i16, Promote);
AddPromotedToType(ISD::FP_TO_FP16, MVT::i16, MVT::i32);
- setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Custom);
// F16 - Constant Actions.
setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
return lowerFMINNUM_FMAXNUM(Op, DAG);
case ISD::FMA:
return splitTernaryVectorOp(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return LowerFP_TO_INT(Op, DAG);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=GCN,FUNC,SI
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=GCN,FUNC,VI
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=GCN,FUNC
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=GCN,FUNC
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=EG -check-prefix=FUNC
declare float @llvm.fabs.f32(float) #1
}
; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i16:
-; The reason different instructions are used on SI and VI is because for
-; SI fp_to_uint is legalized by the type legalizer and for VI it is
-; legalized by the dag legalizer and they legalize fp_to_uint differently.
-; SI: v_cvt_u32_f32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
-; VI: v_cvt_i32_f32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
+; GCN: v_cvt_u32_f32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
; GCN: buffer_store_short [[VAL]]
define amdgpu_kernel void @fp_to_uint_f32_to_i16(i16 addrspace(1)* %out, float %in) #0 {
%uint = fptoui float %in to i16
; GCN-LABEL: {{^}}fptosi_f16_to_i16
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
-; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
-; GCN: v_cvt_i32_f32_e32 v[[R_I16:[0-9]+]], v[[A_F32]]
+; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
+; SI: v_cvt_i32_f32_e32 v[[R_I16:[0-9]+]], v[[A_F32]]
+; VI: v_cvt_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]]
; GCN: buffer_store_short v[[R_I16]]
; GCN: s_endpgm
define amdgpu_kernel void @fptosi_f16_to_i16(
; SI: v_lshlrev_b32_e32 v[[R_I16_HI:[0-9]+]], 16, v[[R_I16_1]]
; SI: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_LO]], v[[R_I16_HI]]
-; VI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
-; VI: v_cvt_f32_f16_sdwa v[[A_F32_1:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI: v_cvt_i32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]]
-; VI: v_cvt_i32_f32_sdwa v[[R_I16_1:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI: v_cvt_i16_f16_e32 v[[A_I16_0:[0-9]+]], v[[A_V2_F16]]
+; VI: v_cvt_i16_f16_sdwa v[[A_I16_1:[0-9]+]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[A_I16_0]], v[[A_I16_1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GCN: buffer_store_dword v[[R_V2_I16]]
; GCN: s_endpgm
; GCN-LABEL: {{^}}fptoui_f16_to_i16
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
-; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
-; SI: v_cvt_u32_f32_e32 v[[R_I16:[0-9]+]], v[[A_F32]]
-; VI: v_cvt_i32_f32_e32 v[[R_I16:[0-9]+]], v[[A_F32]]
+; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
+; SI: v_cvt_u32_f32_e32 v[[R_I16:[0-9]+]], v[[A_F32]]
+; VI: v_cvt_u16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]]
; GCN: buffer_store_short v[[R_I16]]
; GCN: s_endpgm
define amdgpu_kernel void @fptoui_f16_to_i16(
; SI: v_lshlrev_b32_e32 v[[R_I16_HI:[0-9]+]], 16, v[[R_I16_1]]
; SI: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_HI]]
-; VI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_V2_F16]]
-; VI-DAG: v_cvt_f32_f16_sdwa v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]]
-; VI: v_cvt_i32_f32_sdwa v[[R_I16_0:[0-9]+]], v[[A_F32_0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_1]], v[[R_I16_0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI: v_cvt_u16_f16_e32 v[[A_U16_1:[0-9]+]], v[[A_V2_F16]]
+; VI: v_cvt_u16_f16_sdwa v[[R_U16_0:[0-9]+]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[A_U16_1]], v[[R_U16_0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GCN: buffer_store_dword v[[R_V2_I16]]
; GCN: s_endpgm