// VGPR registers
foreach Index = 0-255 in {
- // Set a cost value for vgprs other than the argument registers (v0-v31).
- // The ratio of index/allocation_granularity is taken as the cost value.
- // Considered the allocation granularity as 4 here.
- let CostPerUse=!if(!gt(Index, 31), !srl(Index, 2), 0) in {
-
// There is no special encoding for low 16 bit subreg, this not a real
// register but rather an operand for instructions preserving high 16 bits
// of the result or reading just low 16 bits of a 32 bit VGPR.
let HWEncoding{8} = 1;
let SubRegIndices = [lo16, hi16];
}
- }
}
// AccVGPR registers
; GFX908-DAG: v_accvgpr_read_b32
; GCN: NumVgprs: 256
-; GFX900: ScratchSize: 708
+; GFX900: ScratchSize: 644
; GFX908-FIXME: ScratchSize: 0
; GCN: VGPRBlocks: 63
; GCN: NumVGPRsForWavesPerEU: 256
; OFFREG is offset system SGPR
; GCN: buffer_store_dword {{v[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Spill
; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Reload
-; GCN: NumVgprs: 255
+; GCN: NumVgprs: 256
; GCN: ScratchSize: 1536
define amdgpu_vs void @main([9 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [17 x <4 x i32>] addrspace(4)* inreg %arg2, [34 x <8 x i32>] addrspace(4)* inreg %arg3, [16 x <4 x i32>] addrspace(4)* inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {