From 11d844f96dfb6cf6105cdfa8ebfe518f77bb988a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 29 Oct 2022 11:32:04 -0700 Subject: [PATCH] AMDGPU/GlobalISel: Add missing run line for gfx7 packed operations We were only testing targets with legal 16-bit operations. --- llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll | 161 +++++++++++++++++++++++ 1 file changed, 161 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll index bcfdd70..e7e5f77 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll @@ -1,10 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) { +; GFX7-LABEL: v_add_v2i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_add_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -30,6 +38,18 @@ define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) { } define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) { +; GFX7-LABEL: v_add_v2i16_fneg_lhs: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_add_v2i16_fneg_lhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -58,6 +78,18 @@ define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) { } define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) { +; GFX7-LABEL: v_add_v2i16_fneg_rhs: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX7-NEXT: v_or_b32_e32 v2, v3, v2 +; GFX7-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_add_v2i16_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -86,6 +118,23 @@ define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) { } define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v3 +; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GFX7-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -117,6 +166,14 @@ define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) { } define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) { +; GFX7-LABEL: v_add_v2i16_neg_inline_imm_splat: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_movk_i32 s4, 0xffc0 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, s4, v0 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, s4, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -144,6 +201,13 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) { } define <2 x i16> @v_add_v2i16_neg_inline_imm_lo(<2 x i16> %a) { +; GFX7-LABEL: v_add_v2i16_neg_inline_imm_lo: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0xffffffc0, v0 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, 4, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_lo: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -171,6 +235,13 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_lo(<2 x i16> %a) { } define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) { +; GFX7-LABEL: v_add_v2i16_neg_inline_imm_hi: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_i32_e32 v0, vcc, 4, v0 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, 0xffffffc0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_hi: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -198,6 +269,16 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) { } define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) { +; GFX7-LABEL: s_add_v2i16_neg_inline_imm_splat: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_sub_i32 s1, s1, 64 +; GFX7-NEXT: s_sub_i32 s0, s0, 64 +; GFX7-NEXT: s_and_b32 s1, s1, 0xffff +; GFX7-NEXT: s_and_b32 s0, s0, 0xffff +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_add_v2i16_neg_inline_imm_splat: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshr_b32 s1, s0, 16 @@ -230,6 +311,16 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) { } define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) { +; GFX7-LABEL: s_add_v2i16_neg_inline_imm_lo: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_add_i32 s1, s1, 4 +; GFX7-NEXT: s_sub_i32 s0, s0, 64 +; GFX7-NEXT: s_and_b32 s1, s1, 0xffff +; GFX7-NEXT: s_and_b32 s0, s0, 0xffff +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_add_v2i16_neg_inline_imm_lo: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshr_b32 s1, s0, 16 @@ -262,6 +353,16 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) { } define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) { +; GFX7-LABEL: s_add_v2i16_neg_inline_imm_hi: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_sub_i32 s1, s1, 64 +; GFX7-NEXT: s_add_i32 s0, s0, 4 +; GFX7-NEXT: s_and_b32 s1, s1, 0xffff +; GFX7-NEXT: s_and_b32 s0, s0, 0xffff +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_add_v2i16_neg_inline_imm_hi: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshr_b32 s1, s0, 16 @@ -294,6 +395,16 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) { } define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) { +; GFX7-LABEL: s_add_v2i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_add_i32 s1, s1, s3 +; GFX7-NEXT: s_add_i32 s0, s0, s2 +; GFX7-NEXT: s_and_b32 s1, s1, 0xffff +; GFX7-NEXT: s_and_b32 s0, s0, 0xffff +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_add_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshr_b32 s2, s0, 16 @@ -330,6 +441,21 @@ define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) { } define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg %b) { +; GFX7-LABEL: s_add_v2i16_fneg_lhs: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_and_b32 s0, s0, 0xffff +; GFX7-NEXT: s_or_b32 s0, s1, s0 +; GFX7-NEXT: s_xor_b32 s0, s0, 0x80008000 +; GFX7-NEXT: s_lshr_b32 s1, s0, 16 +; GFX7-NEXT: s_add_i32 s1, s1, s3 +; GFX7-NEXT: s_add_i32 s0, s0, s2 +; GFX7-NEXT: s_and_b32 s1, s1, 0xffff +; GFX7-NEXT: s_and_b32 s0, s0, 0xffff +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_add_v2i16_fneg_lhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_xor_b32 s0, s0, 0x80008000 @@ -371,6 +497,21 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg } define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg %b) { +; GFX7-LABEL: s_add_v2i16_fneg_rhs: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_lshl_b32 s3, s3, 16 +; GFX7-NEXT: s_and_b32 s2, s2, 0xffff +; GFX7-NEXT: s_or_b32 s2, s3, s2 +; GFX7-NEXT: s_xor_b32 s2, s2, 0x80008000 +; GFX7-NEXT: s_lshr_b32 s3, s2, 16 +; GFX7-NEXT: s_add_i32 s1, s1, s3 +; GFX7-NEXT: s_add_i32 s0, s0, s2 +; GFX7-NEXT: s_and_b32 s1, s1, 0xffff +; GFX7-NEXT: s_and_b32 s0, s0, 0xffff +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_add_v2i16_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_xor_b32 s1, s1, 0x80008000 @@ -412,6 +553,26 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg } define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x half> inreg %b) { +; GFX7-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_and_b32 s0, s0, 0xffff +; GFX7-NEXT: s_or_b32 s0, s1, s0 +; GFX7-NEXT: s_lshl_b32 s1, s3, 16 +; GFX7-NEXT: s_and_b32 s2, s2, 0xffff +; GFX7-NEXT: s_or_b32 s1, s1, s2 +; GFX7-NEXT: s_xor_b32 s0, s0, 0x80008000 +; GFX7-NEXT: s_xor_b32 s1, s1, 0x80008000 +; GFX7-NEXT: s_lshr_b32 s2, s0, 16 +; GFX7-NEXT: s_lshr_b32 s3, s1, 16 +; GFX7-NEXT: s_add_i32 s2, s2, s3 +; GFX7-NEXT: s_add_i32 s0, s0, s1 +; GFX7-NEXT: s_and_b32 s1, s2, 0xffff +; GFX7-NEXT: s_and_b32 s0, s0, 0xffff +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_xor_b32 s0, s0, 0x80008000 -- 2.7.4