DAG: Fix vector widening fcanonicalize
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Thu, 2 Aug 2018 13:43:53 +0000 (13:43 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Thu, 2 Aug 2018 13:43:53 +0000 (13:43 +0000)
llvm-svn: 338715

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll

index a7ccb3c..ab868c3 100644 (file)
@@ -2394,6 +2394,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FSIN:
   case ISD::FSQRT:
   case ISD::FTRUNC:
+  case ISD::FCANONICALIZE:
     Res = WidenVecRes_Unary(N);
     break;
   case ISD::FMA:
index 440904f..a3022d1 100644 (file)
@@ -6,6 +6,7 @@ declare half @llvm.fabs.f16(half) #0
 declare half @llvm.canonicalize.f16(half) #0
 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
 declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
+declare <3 x half> @llvm.canonicalize.v3f16(<3 x half>) #0
 declare <4 x half> @llvm.canonicalize.v4f16(<4 x half>) #0
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 
@@ -477,6 +478,25 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> a
   ret void
 }
 
+; FIXME: Extra 4th component handled
+; GCN-LABEL: {{^}}v_test_canonicalize_var_v3f16:
+; GFX9: s_waitcnt
+; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX9-NEXT: s_setpc_b64
+
+; VI-DAG: v_max_f16_sdwa [[CANON_ELT3:v[0-9]+]], v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_max_f16_e32 [[CANON_ELT2:v[0-9]+]], v1, v1
+; VI-DAG: v_max_f16_sdwa [[CANON_ELT1:v[0-9]+]], v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_max_f16_e32 [[CANON_ELT0:v[0-9]+]], v0, v0
+; VI-DAG: v_or_b32_e32 v0, [[CANON_ELT0]], [[CANON_ELT1]]
+; VI-DAG: v_or_b32_e32 v1, [[CANON_ELT2]], [[CANON_ELT3]]
+; VI: s_setpc_b64
+define <3 x half> @v_test_canonicalize_var_v3f16(<3 x half> %val) #1 {
+  %canonicalized = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> %val)
+  ret <3 x half> %canonicalized
+}
+
 ; GCN-LABEL: {{^}}v_test_canonicalize_var_v4f16:
 ; GFX9: s_waitcnt
 ; GFX9-NEXT: v_pk_max_f16 v0, v0, v0