From 0a376d1034d3dfc54a3486d687bd56a0e00caa6b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sat, 12 Nov 2022 09:57:07 -0800
Subject: [PATCH] AMDGPU: Add some tests for i1 sitofp/uitofp-like selects

---
 llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll | 157 ++++++++++++++++++++++++++++-
 llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll | 154 ++++++++++++++++++++++++++++
 2 files changed, 309 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
index 7b820ab..f4ff6c1 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
@@ -225,6 +225,159 @@ define double @v_sint_to_fp_i8_to_f64(i8 %in) {
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %fp = sitofp i8 %in to double
   ret double %fp
+  }
+
+define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; CI-LABEL: s_select_sint_to_fp_i1_vals_f64:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT:    v_mov_b32_e32 v0, 0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_cmp_eq_u32 s2, 0
+; CI-NEXT:    s_cselect_b32 s2, 0xbff00000, 0
+; CI-NEXT:    v_mov_b32_e32 v3, s1
+; CI-NEXT:    v_mov_b32_e32 v1, s2
+; CI-NEXT:    v_mov_b32_e32 v2, s0
+; CI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; CI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_select_sint_to_fp_i1_vals_f64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[4:5], 0x8
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT:    v_mov_b32_e32 v0, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_cmp_eq_u32 s2, 0
+; VI-NEXT:    s_cselect_b32 s2, 0xbff00000, 0
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, double -1.0, double 0.0
+  store double %select, double addrspace(1)* %out, align 8
+  ret void
+}
+
+define void @v_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; GCN-LABEL: v_select_sint_to_fp_i1_vals_f64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v4, 0xbff00000
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; GCN-NEXT:    v_mov_b32_e32 v3, 0
+; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, double -1.0, double 0.0
+  store double %select, double addrspace(1)* %out, align 8
+  ret void
+}
+
+define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+; CI-LABEL: s_select_sint_to_fp_i1_vals_i64:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT:    v_mov_b32_e32 v0, 0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_cmp_eq_u32 s2, 0
+; CI-NEXT:    s_cselect_b32 s2, 0xbff00000, 0
+; CI-NEXT:    v_mov_b32_e32 v3, s1
+; CI-NEXT:    v_mov_b32_e32 v1, s2
+; CI-NEXT:    v_mov_b32_e32 v2, s0
+; CI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; CI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_select_sint_to_fp_i1_vals_i64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[4:5], 0x8
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT:    v_mov_b32_e32 v0, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_cmp_eq_u32 s2, 0
+; VI-NEXT:    s_cselect_b32 s2, 0xbff00000, 0
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
+  store i64 %select, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+define void @v_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+; GCN-LABEL: v_select_sint_to_fp_i1_vals_i64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v4, 0xbff00000
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; GCN-NEXT:    v_mov_b32_e32 v3, 0
+; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
+  store i64 %select, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; TODO: This should swap the selected order / invert the compare and do it.
+define void @v_swap_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; GCN-LABEL: v_swap_select_sint_to_fp_i1_vals_f64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v4, 0xbff00000
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; GCN-NEXT:    v_mov_b32_e32 v3, 0
+; GCN-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
+; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, double 0.0, double -1.0
+  store double %select, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; TODO: This should swap the selected order / invert the compare and do it.
+define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; CI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT:    v_mov_b32_e32 v0, 0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_cmp_eq_u32 s2, 0
+; CI-NEXT:    s_cselect_b32 s2, 0, 0xbff00000
+; CI-NEXT:    v_mov_b32_e32 v3, s1
+; CI-NEXT:    v_mov_b32_e32 v1, s2
+; CI-NEXT:    v_mov_b32_e32 v2, s0
+; CI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; CI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[4:5], 0x8
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT:    v_mov_b32_e32 v0, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_cmp_eq_u32 s2, 0
+; VI-NEXT:    s_cselect_b32 s2, 0, 0xbff00000
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, double 0.0, double -1.0
+  store double %select, double addrspace(1)* %out, align 8
+  ret void
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
index d4286b8a..ddb9aef 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
@@ -396,3 +396,157 @@ define double @v_uint_to_fp_i8_to_f64(i8 %in) {
   %fp = uitofp i8 %in to double
   ret double %fp
 }
+
+define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; SI-LABEL: s_select_uint_to_fp_i1_vals_f64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s2, s[4:5], 0x2
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; SI-NEXT:    v_mov_b32_e32 v0, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_cmp_eq_u32 s2, 0
+; SI-NEXT:    s_cselect_b32 s2, 0x3ff00000, 0
+; SI-NEXT:    v_mov_b32_e32 v3, s1
+; SI-NEXT:    v_mov_b32_e32 v1, s2
+; SI-NEXT:    v_mov_b32_e32 v2, s0
+; SI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_select_uint_to_fp_i1_vals_f64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[4:5], 0x8
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT:    v_mov_b32_e32 v0, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_cmp_eq_u32 s2, 0
+; VI-NEXT:    s_cselect_b32 s2, 0x3ff00000, 0
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, double 1.0, double 0.0
+  store double %select, double addrspace(1)* %out, align 8
+  ret void
+}
+
+define void @v_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; GCN-LABEL: v_select_uint_to_fp_i1_vals_f64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v4, 0x3ff00000
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; GCN-NEXT:    v_mov_b32_e32 v3, 0
+; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, double 1.0, double 0.0
+  store double %select, double addrspace(1)* %out, align 8
+  ret void
+}
+
+define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+; SI-LABEL: s_select_uint_to_fp_i1_vals_i64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s2, s[4:5], 0x2
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; SI-NEXT:    v_mov_b32_e32 v0, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_cmp_eq_u32 s2, 0
+; SI-NEXT:    s_cselect_b32 s2, 0x3ff00000, 0
+; SI-NEXT:    v_mov_b32_e32 v3, s1
+; SI-NEXT:    v_mov_b32_e32 v1, s2
+; SI-NEXT:    v_mov_b32_e32 v2, s0
+; SI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_select_uint_to_fp_i1_vals_i64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[4:5], 0x8
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT:    v_mov_b32_e32 v0, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_cmp_eq_u32 s2, 0
+; VI-NEXT:    s_cselect_b32 s2, 0x3ff00000, 0
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0
+  store i64 %select, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+define void @v_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+; GCN-LABEL: v_select_uint_to_fp_i1_vals_i64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v4, 0x3ff00000
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; GCN-NEXT:    v_mov_b32_e32 v3, 0
+; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0
+  store i64 %select, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; TODO: This should swap the selected order / invert the compare and do it.
+define amdgpu_kernel void @s_swap_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; SI-LABEL: s_swap_select_uint_to_fp_i1_vals_f64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s2, s[4:5], 0x2
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; SI-NEXT:    v_mov_b32_e32 v0, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_cmp_eq_u32 s2, 0
+; SI-NEXT:    s_cselect_b32 s2, 0, 0x3ff00000
+; SI-NEXT:    v_mov_b32_e32 v3, s1
+; SI-NEXT:    v_mov_b32_e32 v1, s2
+; SI-NEXT:    v_mov_b32_e32 v2, s0
+; SI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_swap_select_uint_to_fp_i1_vals_f64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[4:5], 0x8
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT:    v_mov_b32_e32 v0, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_cmp_eq_u32 s2, 0
+; VI-NEXT:    s_cselect_b32 s2, 0, 0x3ff00000
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, double 0.0, double 1.0
+  store double %select, double addrspace(1)* %out, align 8
+  ret void
+}
+
+define void @v_swap_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; GCN-LABEL: v_swap_select_uint_to_fp_i1_vals_f64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v4, 0x3ff00000
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; GCN-NEXT:    v_mov_b32_e32 v3, 0
+; GCN-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
+; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, double 0.0, double 1.0
+  store double %select, double addrspace(1)* %out, align 8
+  ret void
+}
-- 
2.7.4