ret void
}
-; FIXME: x16 inlineasm seems broken
-; define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 {
-; %wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-; %cmp = icmp eq i32 %in, 0
-; br i1 %cmp, label %bb0, label %ret
-
-; bb0:
-; call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0
-; br label %ret
-
-; ret:
-; ret void
-; }
+; ALL-LABEL: {{^}}spill_sgpr_x16:
+
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15
+; VGPR: s_cbranch_scc1
+
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15
+
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: s_cbranch_scc1
+
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 {
+ %wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
+ %cmp = icmp eq i32 %in, 0
+ br i1 %cmp, label %bb0, label %ret
+
+bb0:
+ call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0
+ br label %ret
+
+ret:
+ ret void
+}
+
+; ALL-LABEL: {{^}}spill_sgpr_x32:
+
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 16
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 17
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 18
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 19
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 20
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 21
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 22
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 23
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 24
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 25
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 26
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 27
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 28
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 29
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 30
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 31
+; VGPR: s_cbranch_scc1
+
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 16
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 17
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 18
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 19
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 20
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 21
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 22
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 23
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 24
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 25
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 26
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 27
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 28
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 29
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 30
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 31
+
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: s_cbranch_scc1
+
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+define amdgpu_kernel void @spill_sgpr_x32(i32 addrspace(1)* %out, i32 %in) #0 {
+ %wide.sgpr = call <32 x i32> asm sideeffect "; def $0", "=s" () #0
+ %cmp = icmp eq i32 %in, 0
+ br i1 %cmp, label %bb0, label %ret
+
+bb0:
+ call void asm sideeffect "; use $0", "s"(<32 x i32> %wide.sgpr) #0
+ br label %ret
+
+ret:
+ ret void
+}
attributes #0 = { nounwind }