From abfd10807ca665fe0dc5e24bcff09fb5db0a12ec Mon Sep 17 00:00:00 2001 From: Abderrazek Zaafrani Date: Thu, 28 Feb 2019 20:21:46 +0000 Subject: [PATCH] [AArch64] Improve FP16 vector convert from short instructions. https://reviews.llvm.org/D58563 llvm-svn: 355134 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 21 ++++++++++----- llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll | 31 +++++++++++++---------- llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll | 30 ++++++++++++---------- 3 files changed, 48 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2a260dd..56c562a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -660,14 +660,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // elements smaller than i32, so promote the input to i32 first. setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32); setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32); - setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32); - setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32); - // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16 - // -> v8f16 conversions. + // i8 vector elements also need promotion to i32 for v8i8 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32); setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32); - setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32); - setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32); // Similarly, there is no direct i32 -> f64 vector conversion instruction. setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); @@ -678,6 +673,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); + if (Subtarget->hasFullFP16()) { + setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); + } else { + // when AArch64 doesn't have fullfp16 support, promote the input + // to i32 first. + setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32); + setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32); + setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32); + setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32); + } + setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); diff --git a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll index 67fd5b2..20f1c4a 100644 --- a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -156,21 +156,22 @@ define <4 x half> @sitofp_i8(<4 x i8> %a) #0 { ; CHECK-COMMON-LABEL: sitofp_i8: ; CHECK-COMMON-NEXT: shl [[OP1:v[0-9]+\.4h]], v0.4h, #8 ; CHECK-COMMON-NEXT: sshr [[OP2:v[0-9]+\.4h]], [[OP1]], #8 -; CHECK-COMMON-NEXT: sshll [[OP3:v[0-9]+\.4s]], [[OP2]], #0 -; CHECK-COMMON-NEXT: scvtf [[OP4:v[0-9]+\.4s]], [[OP3]] -; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP4]] +; CHECK-FP16-NEXT: scvtf v0.4h, [[OP2]] +; CHECK-CVT-NEXT: sshll [[OP3:v[0-9]+\.4s]], [[OP2]], #0 +; CHECK-CVT-NEXT: scvtf [[OP4:v[0-9]+\.4s]], [[OP3]] +; CHECK-CVT-NEXT: fcvtn v0.4h, [[OP4]] ; CHECK-COMMON-NEXT: ret %1 = sitofp <4 x i8> %a to <4 x half> ret <4 x half> %1 } - define <4 x half> @sitofp_i16(<4 x i16> %a) #0 { ; CHECK-COMMON-LABEL: sitofp_i16: -; CHECK-COMMON-NEXT: sshll [[OP1:v[0-9]+\.4s]], v0.4h, #0 -; CHECK-COMMON-NEXT: scvtf [[OP2:v[0-9]+\.4s]], [[OP1]] -; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP2]] -; CHECK-COMMON-NEXT: ret +; CHECK-FP16-NEXT: scvtf v0.4h, v0.4h +; CHECK-CVT-NEXT: sshll [[OP1:v[0-9]+\.4s]], v0.4h, #0 +; CHECK-CVT-NEXT: scvtf [[OP2:v[0-9]+\.4s]], [[OP1]] +; CHECK-CVT-NEXT: fcvtn v0.4h, [[OP2]] +; CHECK-COMMON-NEXT: ret %1 = sitofp <4 x i16> %a to <4 x half> ret <4 x half> %1 } @@ -201,9 +202,10 @@ define <4 x half> @sitofp_i64(<4 x i64> %a) #0 { define <4 x half> @uitofp_i8(<4 x i8> %a) #0 { ; CHECK-COMMON-LABEL: uitofp_i8: ; CHECK-COMMON-NEXT: bic v0.4h, #255, lsl #8 -; CHECK-COMMON-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0 -; CHECK-COMMON-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]] -; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP2]] +; CHECK-FP16-NEXT: ucvtf v0.4h, v0.4h +; CHECK-CVT-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0 +; CHECK-CVT-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]] +; CHECK-CVT-NEXT: fcvtn v0.4h, [[OP2]] ; CHECK-COMMON-NEXT: ret %1 = uitofp <4 x i8> %a to <4 x half> ret <4 x half> %1 @@ -212,9 +214,10 @@ define <4 x half> @uitofp_i8(<4 x i8> %a) #0 { define <4 x half> @uitofp_i16(<4 x i16> %a) #0 { ; CHECK-COMMON-LABEL: uitofp_i16: -; CHECK-COMMON-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0 -; CHECK-COMMON-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]] -; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP2]] +; CHECK-FP16-NEXT: ucvtf v0.4h, v0.4h +; CHECK-CVT-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0 +; CHECK-CVT-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]] +; CHECK-CVT-NEXT: fcvtn v0.4h, [[OP2]] ; CHECK-COMMON-NEXT: ret %1 = uitofp <4 x i16> %a to <4 x half> ret <4 x half> %1 diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll index 9cd0f1e..7252bb1 100644 --- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -295,13 +295,14 @@ define <8 x half> @sitofp_i8(<8 x i8> %a) #0 { define <8 x half> @sitofp_i16(<8 x i16> %a) #0 { ; CHECK-LABEL: sitofp_i16: -; CHECK-NEXT: sshll2 [[LO:v[0-9]+\.4s]], v0.8h, #0 -; CHECK-NEXT: sshll [[HI:v[0-9]+\.4s]], v0.4h, #0 -; CHECK-DAG: scvtf [[HIF:v[0-9]+\.4s]], [[HI]] -; CHECK-DAG: scvtf [[LOF:v[0-9]+\.4s]], [[LO]] -; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]] -; CHECK-DAG: fcvtn v0.4h, [[HIF]] -; CHECK: mov v0.d[1], v[[LOREG]].d[0] +; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h +; CHECK-CVT-NEXT: sshll2 [[LO:v[0-9]+\.4s]], v0.8h, #0 +; CHECK-CVT-NEXT: sshll [[HI:v[0-9]+\.4s]], v0.4h, #0 +; CHECK-CVT-DAG: scvtf [[HIF:v[0-9]+\.4s]], [[HI]] +; CHECK-CVT-DAG: scvtf [[LOF:v[0-9]+\.4s]], [[LO]] +; CHECK-CVT-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]] +; CHECK-CVT-DAG: fcvtn v0.4h, [[HIF]] +; CHECK-CVT-NEXT: mov v0.d[1], v[[LOREG]].d[0] %1 = sitofp <8 x i16> %a to <8 x half> ret <8 x half> %1 } @@ -347,13 +348,14 @@ define <8 x half> @uitofp_i8(<8 x i8> %a) #0 { define <8 x half> @uitofp_i16(<8 x i16> %a) #0 { ; CHECK-LABEL: uitofp_i16: -; CHECK-NEXT: ushll2 [[LO:v[0-9]+\.4s]], v0.8h, #0 -; CHECK-NEXT: ushll [[HI:v[0-9]+\.4s]], v0.4h, #0 -; CHECK-DAG: ucvtf [[HIF:v[0-9]+\.4s]], [[HI]] -; CHECK-DAG: ucvtf [[LOF:v[0-9]+\.4s]], [[LO]] -; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]] -; CHECK-DAG: fcvtn v0.4h, [[HIF]] -; CHECK: mov v0.d[1], v[[LOREG]].d[0] +; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h +; CHECK-CVT-NEXT: ushll2 [[LO:v[0-9]+\.4s]], v0.8h, #0 +; CHECK-CVT-NEXT: ushll [[HI:v[0-9]+\.4s]], v0.4h, #0 +; CHECK-CVT-DAG: ucvtf [[HIF:v[0-9]+\.4s]], [[HI]] +; CHECK-CVT-DAG: ucvtf [[LOF:v[0-9]+\.4s]], [[LO]] +; CHECK-CVT-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]] +; CHECK-CVT-DAG: fcvtn v0.4h, [[HIF]] +; CHECK-CVT-NEXT: mov v0.d[1], v[[LOREG]].d[0] %1 = uitofp <8 x i16> %a to <8 x half> ret <8 x half> %1 } -- 2.7.4