From 5b652f77e02f4aaa4872daf134b56f5d7388f402 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 8 Sep 2022 14:13:17 +0100 Subject: [PATCH] [AMDGPU] Add basic tests for emitting v_fma_f16 and friends --- llvm/test/CodeGen/AMDGPU/fma.f16.ll | 102 ++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/fma.f16.ll diff --git a/llvm/test/CodeGen/AMDGPU/fma.f16.ll b/llvm/test/CodeGen/AMDGPU/fma.f16.ll new file mode 100644 index 0000000..20d39ef --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fma.f16.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX9,GFX9-SDAG +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX9,GFX9-GISEL +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10,GFX10-SDAG +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL + +declare half @llvm.fma.f16(half, half, half) +declare half @llvm.maxnum.f16(half, half) + +define half @test_fma(half %x, half %y, half %z) { +; GFX9-LABEL: test_fma: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_fma: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %r = call half @llvm.fma.f16(half %x, half %y, half %z) + ret half %r +} + +; GFX10+ has v_fmac_f16. +define half @test_fmac(half %x, half %y, half %z) { +; GFX9-LABEL: test_fmac: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f16 v0, v1, v2, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_fmac: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_fmac_f16_e32 v0, v1, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %r = call half @llvm.fma.f16(half %y, half %z, half %x) + ret half %r +} + +; GFX10+ has v_fmaak_f16. +define half @test_fmaak(half %x, half %y, half %z) { +; GFX9-SDAG-LABEL: test_fmaak: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x4200 +; GFX9-SDAG-NEXT: v_fma_f16 v0, v0, v1, s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_fmaak: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x4200 +; GFX9-GISEL-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: test_fmaak: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-SDAG-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: test_fmaak: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-GISEL-NEXT: v_fma_f16 v0, v0, v1, 0x4200 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] + %r = call half @llvm.fma.f16(half %x, half %y, half 0xH4200) + ret half %r +} + +; GFX10+ has v_fmamk_f16. +define half @test_fmamk(half %x, half %y, half %z) { +; GFX9-SDAG-LABEL: test_fmamk: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x4200 +; GFX9-SDAG-NEXT: v_fma_f16 v0, v0, s4, v2 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_fmamk: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x4200 +; GFX9-GISEL-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_fmamk: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %r = call half @llvm.fma.f16(half %x, half 0xH4200, half %z) + ret half %r +} -- 2.7.4