ret void
}
-; FUNC-LABEL: {{^}}div_afn_2_x_pat_f16:
+; SI-LABEL: {{^}}div_afn_2_x_pat_f16:
; SI: v_mul_f32_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}}
; GFX8_9_10: v_mul_f16_e32 [[MUL:v[0-9]+]], 0.5, v{{[0-9]+}}
ret void
}
-; FUNC-LABEL: {{^}}div_afn_k_x_pat_f16:
+; SI-LABEL: {{^}}div_afn_k_x_pat_f16:
; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dcccccd, v{{[0-9]+}}
; GFX8_9_10: v_mul_f16_e32 [[MUL:v[0-9]+]], 0x2e66, v{{[0-9]+}}
ret void
}
-; FUNC-LABEL: {{^}}div_afn_neg_k_x_pat_f16:
+; SI-LABEL: {{^}}div_afn_neg_k_x_pat_f16:
; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdcccccd, v{{[0-9]+}}
; GFX8_9_10: v_mul_f16_e32 [[MUL:v[0-9]+]], 0xae66, v{{[0-9]+}}
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN-UNSAFE %s
-; FUNC-LABEL: {{^}}fptrunc_f64_to_f32:
+; GCN-LABEL: {{^}}fptrunc_f64_to_f32:
; GCN: v_cvt_f32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @fptrunc_f64_to_f32(float addrspace(1)* %out, double %in) {
%result = fptrunc double %in to float
ret void
}
-; FUNC-LABEL: {{^}}fptrunc_f64_to_f16:
+; GCN-LABEL: {{^}}fptrunc_f64_to_f16:
; GCN-NOT: v_cvt
; GCN-UNSAFE: v_cvt_f32_f64_e32 [[F32:v[0-9]+]]
; GCN-UNSAFE: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[F32]]
ret void
}
-; FUNC-LABEL: {{^}}fptrunc_v2f64_to_v2f32:
+; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f32:
; GCN: v_cvt_f32_f64_e32
; GCN: v_cvt_f32_f64_e32
define amdgpu_kernel void @fptrunc_v2f64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x double> %in) {
ret void
}
-; FUNC-LABEL: {{^}}fptrunc_v3f64_to_v3f32:
+; GCN-LABEL: {{^}}fptrunc_v3f64_to_v3f32:
; GCN: v_cvt_f32_f64_e32
; GCN: v_cvt_f32_f64_e32
; GCN: v_cvt_f32_f64_e32
ret void
}
-; FUNC-LABEL: {{^}}fptrunc_v4f64_to_v4f32:
+; GCN-LABEL: {{^}}fptrunc_v4f64_to_v4f32:
; GCN: v_cvt_f32_f64_e32
; GCN: v_cvt_f32_f64_e32
; GCN: v_cvt_f32_f64_e32
ret void
}
-; FUNC-LABEL: {{^}}fptrunc_v8f64_to_v8f32:
+; GCN-LABEL: {{^}}fptrunc_v8f64_to_v8f32:
; GCN: v_cvt_f32_f64_e32
; GCN: v_cvt_f32_f64_e32
; GCN: v_cvt_f32_f64_e32
declare i32 @llvm.amdgcn.ds.bpermute(i32, i32) #0
-; FUNC-LABEL: {{^}}ds_bpermute:
+; CHECK-LABEL: {{^}}ds_bpermute:
; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @ds_bpermute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind {
%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
-; FUNC-LABEL: {{^}}ds_ordered_add:
+; GCN-LABEL: {{^}}ds_ordered_add:
; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN-DAG: s_mov_b32 m0,
; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
ret void
}
-; FUNC-LABEL: {{^}}ds_ordered_add_4dw:
+; GCN-LABEL: {{^}}ds_ordered_add_4dw:
; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN-DAG: s_mov_b32 m0,
; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:49924 gds
declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #0
-; FUNC-LABEL: {{^}}ds_swizzle:
+; CHECK-LABEL: {{^}}ds_swizzle:
; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:swizzle(BITMASK_PERM,"00p11")
define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) nounwind {
%swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
declare i32 @llvm.amdgcn.sffbh.i32(i32) #1
-; FUNC-LABEL: {{^}}s_flbit:
+; GCN-LABEL: {{^}}s_flbit:
; GCN: s_load_dword [[VAL:s[0-9]+]],
; GCN: s_flbit_i32 [[SRESULT:s[0-9]+]], [[VAL]]
; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
ret void
}
-; FUNC-LABEL: {{^}}v_flbit:
+; GCN-LABEL: {{^}}v_flbit:
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: v_ffbh_i32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[RESULT]],
; Test with inline immediate
-; FUNC-LABEL: {{^}}shl_2_add_9_i32:
+; SI-LABEL: {{^}}shl_2_add_9_i32:
; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 36, [[REG]]
; SI: buffer_store_dword [[RESULT]]
ret void
}
-; FUNC-LABEL: {{^}}shl_2_add_9_i32_2_add_uses:
+; SI-LABEL: {{^}}shl_2_add_9_i32_2_add_uses:
; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], vcc, 9, {{v[0-9]+}}
; SI-DAG: v_lshlrev_b32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}}
; SI-DAG: buffer_store_dword [[ADDREG]]
; Test with add literal constant
-; FUNC-LABEL: {{^}}shl_2_add_999_i32:
+; SI-LABEL: {{^}}shl_2_add_999_i32:
; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xf9c, [[REG]]
; SI: buffer_store_dword [[RESULT]]
ret void
}
-; FUNC-LABEL: {{^}}test_add_shl_add_constant:
+; SI-LABEL: {{^}}test_add_shl_add_constant:
; SI-DAG: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Y:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x13
; SI-DAG: s_lshl_b32 [[SHL3:s[0-9]+]], s[[X]], 3
; SI: s_add_i32 [[RESULT:s[0-9]+]], [[SHL3]], s[[Y]]
ret void
}
-; FUNC-LABEL: {{^}}test_add_shl_add_constant_inv:
+; SI-LABEL: {{^}}test_add_shl_add_constant_inv:
; SI-DAG: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Y:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x13
; SI: s_lshl_b32 [[SHL3:s[0-9]+]], s[[X]], 3
; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], s[[Y]]
ret void
}
-; FUNC-LABEL: {{^}}s_sint_to_fp_i1_to_f16:
+; GCN-LABEL: {{^}}s_sint_to_fp_i1_to_f16:
; GCN-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 1.0, {{v[0-9]+}}
; GCN-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 0, {{v[0-9]+}}
; GCN: s_xor_b64 [[R_CMP:s\[[0-9]+:[0-9]+\]]], [[CMP1]], [[CMP0]]
ret void
}
-; FUNC-LABEL: {{^}}s_uint_to_fp_i1_to_f16:
+; GCN-LABEL: {{^}}s_uint_to_fp_i1_to_f16:
; GCN-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 1.0, {{v[0-9]+}}
; GCN-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 0, {{v[0-9]+}}
; GCN: s_xor_b64 [[R_CMP:s\[[0-9]+:[0-9]+\]]], [[CMP1]], [[CMP0]]
ret void
}
-; FUNC-LABEL: {{^}}local_unaligned_load_store_i32:
+; SI-LABEL: {{^}}local_unaligned_load_store_i32:
; SI: ds_read_u8
; SI: ds_read_u8
ret void
}
-; FUNC-LABEL: {{^}}local_align2_load_store_i32:
+; GCN-LABEL: {{^}}local_align2_load_store_i32:
; GCN: ds_read_u16
; GCN: ds_read_u16
; GCN: ds_write_b16
ret void
}
-; FUNC-LABEL: {{^}}local_unaligned_load_store_i64:
+; SI-LABEL: {{^}}local_unaligned_load_store_i64:
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
ret void
}
-; FUNC-LABEL: {{^}}local_unaligned_load_store_v4i32:
+; GCN-LABEL: {{^}}local_unaligned_load_store_v4i32:
; GCN: ds_read_u8
; GCN: ds_read_u8
; GCN: ds_read_u8
ret void
}
-; FUNC-LABEL: {{^}}local_load_i64_align_4:
+; GCN-LABEL: {{^}}local_load_i64_align_4:
; GCN: ds_read2_b32
define amdgpu_kernel void @local_load_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%val = load i64, i64 addrspace(3)* %in, align 4
ret void
}
-; FUNC-LABEL: {{^}}local_load_i64_align_4_with_offset
+; GCN-LABEL: {{^}}local_load_i64_align_4_with_offset
; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
define amdgpu_kernel void @local_load_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4
ret void
}
-; FUNC-LABEL: {{^}}local_load_i64_align_4_with_split_offset:
+; GCN-LABEL: {{^}}local_load_i64_align_4_with_split_offset:
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
; GCN: s_endpgm
ret void
}
-; FUNC-LABEL: {{^}}local_load_i64_align_1:
+; GCN-LABEL: {{^}}local_load_i64_align_1:
; GCN: ds_read_u8
; GCN: ds_read_u8
; GCN: ds_read_u8
ret void
}
-; FUNC-LABEL: {{^}}local_store_i64_align_4:
+; GCN-LABEL: {{^}}local_store_i64_align_4:
; GCN: ds_write2_b32
define amdgpu_kernel void @local_store_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
store i64 %val, i64 addrspace(3)* %out, align 4
ret void
}
-; FUNC-LABEL: {{^}}local_store_i64_align_4_with_offset
+; GCN-LABEL: {{^}}local_store_i64_align_4_with_offset
; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
; GCN: s_endpgm
define amdgpu_kernel void @local_store_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
ret void
}
-; FUNC-LABEL: {{^}}local_store_i64_align_4_with_split_offset:
+; GCN-LABEL: {{^}}local_store_i64_align_4_with_split_offset:
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
; GCN: s_endpgm