Overallocating by 2 gprs for ugprs is a wild guess by me. It does make
sense though as each subgroup shares 64 ugprs and that's 2 per thread.
Signed-off-by: Karol Herbst <git@karolherbst.de>
Reviewed-by: M Henning <drawoc@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24261>
prog->relocs = info_out.bin.relocData;
prog->fixups = info_out.bin.fixupData;
if (info_out.target >= NVISA_GV100_CHIPSET)
- prog->num_gprs = MIN2(info_out.bin.maxGPR + 5, 255); //XXX: why?
+ prog->num_gprs = MAX2(4, info_out.bin.maxGPR + 3);
else
- prog->num_gprs = MAX2(4, (info_out.bin.maxGPR + 1));
+ prog->num_gprs = MAX2(4, info_out.bin.maxGPR + 1);
prog->cp.smem_size = info_out.bin.smemSize;
prog->num_barriers = info_out.numBarriers;
unsigned int
TargetNVC0::getFileSize(DataFile file) const
{
- const unsigned int gprs = (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63;
const unsigned int smregs = (chipset >= NVISA_GK104_CHIPSET) ? 65536 : 32768;
const unsigned int bs = (chipset >= NVISA_GV100_CHIPSET) ? 16 : 0;
+ unsigned int gprs;
+
+ /* probably because of ugprs? */
+ if (chipset >= NVISA_GV100_CHIPSET)
+ gprs = 253;
+ else if (chipset >= NVISA_GK20A_CHIPSET)
+ gprs = 255;
+ else
+ gprs = 63;
+
switch (file) {
case FILE_NULL: return 0;
case FILE_GPR: return MIN2(gprs, smregs / threads);