From: Karol Herbst Date: Thu, 20 Jul 2023 13:38:13 +0000 (+0200) Subject: nvc0: fix num_gprs for Volta+ X-Git-Tag: upstream/23.3.3~5263 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=23795dc318a0df9e233123b76c1d61506a6289ce;p=platform%2Fupstream%2Fmesa.git nvc0: fix num_gprs for Volta+ Overallocating by 2 gprs for ugprs is a wild guess by me. It does make sense though as each subgroup shares 64 ugprs and that's 2 per thread. Signed-off-by: Karol Herbst Reviewed-by: M Henning Part-of: --- diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 63f781b..33edd13 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -686,9 +686,9 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, prog->relocs = info_out.bin.relocData; prog->fixups = info_out.bin.fixupData; if (info_out.target >= NVISA_GV100_CHIPSET) - prog->num_gprs = MIN2(info_out.bin.maxGPR + 5, 255); //XXX: why? + prog->num_gprs = MAX2(4, info_out.bin.maxGPR + 3); else - prog->num_gprs = MAX2(4, (info_out.bin.maxGPR + 1)); + prog->num_gprs = MAX2(4, info_out.bin.maxGPR + 1); prog->cp.smem_size = info_out.bin.smemSize; prog->num_barriers = info_out.numBarriers; diff --git a/src/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/nouveau/codegen/nv50_ir_target_nvc0.cpp index da6c37c..4755390 100644 --- a/src/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -265,9 +265,18 @@ void TargetNVC0::initOpInfo() unsigned int TargetNVC0::getFileSize(DataFile file) const { - const unsigned int gprs = (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63; const unsigned int smregs = (chipset >= NVISA_GK104_CHIPSET) ? 65536 : 32768; const unsigned int bs = (chipset >= NVISA_GV100_CHIPSET) ? 16 : 0; + unsigned int gprs; + + /* probably because of ugprs? */ + if (chipset >= NVISA_GV100_CHIPSET) + gprs = 253; + else if (chipset >= NVISA_GK20A_CHIPSET) + gprs = 255; + else + gprs = 63; + switch (file) { case FILE_NULL: return 0; case FILE_GPR: return MIN2(gprs, smregs / threads);