From 9d49c449ec0750645de3165a01e0f9667fff2130 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 18 Sep 2018 01:51:33 +0000 Subject: [PATCH] AMDGPU: Expand vector canonicalizes llvm-svn: 342439 --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 + llvm/test/CodeGen/AMDGPU/fcanonicalize.ll | 76 ++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 08c3b75..3b0026a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -471,6 +471,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, VT, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); setOperationAction(ISD::SETCC, VT, Expand); + setOperationAction(ISD::FCANONICALIZE, VT, Expand); } // This causes using an unrolled select operation rather than expansion with diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll index 6b2d58db..72870c5 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll @@ -2,12 +2,18 @@ declare float @llvm.fabs.f32(float) #0 declare float @llvm.canonicalize.f32(float) #0 +declare <2 x float> @llvm.canonicalize.v2f32(<2 x float>) #0 +declare <3 x float> @llvm.canonicalize.v3f32(<3 x float>) #0 +declare <4 x float> @llvm.canonicalize.v4f32(<4 x float>) #0 +declare <8 x float> @llvm.canonicalize.v8f32(<8 x float>) #0 declare double @llvm.fabs.f64(double) #0 declare double @llvm.canonicalize.f64(double) #0 +declare <2 x double> @llvm.canonicalize.v2f64(<2 x double>) #0 +declare <3 x double> @llvm.canonicalize.v3f64(<3 x double>) #0 +declare <4 x double> @llvm.canonicalize.v4f64(<4 x double>) #0 declare half @llvm.canonicalize.f16(half) #0 declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0 declare i32 @llvm.amdgcn.workitem.id.x() #0 -declare <2 x double> @llvm.canonicalize.v2f64(<2 x double>) #0 ; GCN-LABEL: {{^}}v_test_canonicalize_var_f32: ; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} @@ -550,6 +556,74 @@ define amdgpu_kernel void @v_test_canonicalize_var_v2f64(<2 x double> addrspace( ret void } +; GCN-LABEL: {{^}}v_test_canonicalize_v2f32: +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +define <2 x float> @v_test_canonicalize_v2f32(<2 x float> %arg) #1 { + %canon = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> %arg) + ret <2 x float> %canon +} + +; GCN-LABEL: {{^}}v_test_canonicalize_v3f32: +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +define <3 x float> @v_test_canonicalize_v3f32(<3 x float> %arg) #1 { + %canon = call <3 x float> @llvm.canonicalize.v3f32(<3 x float> %arg) + ret <3 x float> %canon +} + +; GCN-LABEL: {{^}}v_test_canonicalize_v4f32: +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +define <4 x float> @v_test_canonicalize_v4f32(<4 x float> %arg) #1 { + %canon = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %arg) + ret <4 x float> %canon +} + +; GCN-LABEL: {{^}}v_test_canonicalize_v8f32: +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +define <8 x float> @v_test_canonicalize_v8f32(<8 x float> %arg) #1 { + %canon = call <8 x float> @llvm.canonicalize.v8f32(<8 x float> %arg) + ret <8 x float> %canon +} + +; GCN-LABEL: {{^}}v_test_canonicalize_v2f64: +; GCN: v_max_f64 +; GCN: v_max_f64 +define <2 x double> @v_test_canonicalize_v2f64(<2 x double> %arg) #1 { + %canon = call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %arg) + ret <2 x double> %canon +} + +; GCN-LABEL: {{^}}v_test_canonicalize_v3f64: +; GCN: v_max_f64 +; GCN: v_max_f64 +; GCN: v_max_f64 +define <3 x double> @v_test_canonicalize_v3f64(<3 x double> %arg) #1 { + %canon = call <3 x double> @llvm.canonicalize.v3f64(<3 x double> %arg) + ret <3 x double> %canon +} + +; GCN-LABEL: {{^}}v_test_canonicalize_v4f64: +; GCN: v_max_f64 +; GCN: v_max_f64 +; GCN: v_max_f64 +; GCN: v_max_f64 +define <4 x double> @v_test_canonicalize_v4f64(<4 x double> %arg) #1 { + %canon = call <4 x double> @llvm.canonicalize.v4f64(<4 x double> %arg) + ret <4 x double> %canon +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" } -- 2.7.4