From ae59a7d35d2b6f23634617dc91e1baf85c9d6d81 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 9 Mar 2013 12:11:38 +0100 Subject: [PATCH] nvc0: they removed the NTID,NCTAID,GRIDID registers on nve4 --- src/gallium/drivers/nv50/codegen/nv50_ir_driver.h | 1 + .../drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp | 25 +++++++++++++++-- .../drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp | 4 +++ src/gallium/drivers/nvc0/nvc0_program.c | 1 + src/gallium/drivers/nvc0/nve4_compute.c | 32 +++++++++++++++------- src/gallium/drivers/nvc0/nve4_compute.h | 26 ++++++++++-------- 6 files changed, 66 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h index 7bdd4b9..c5a5b23 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h @@ -164,6 +164,7 @@ struct nv50_ir_prog_info struct { uint32_t inputOffset; /* base address for user args */ uint32_t sharedOffset; /* reserved space in s[] */ + uint32_t gridInfoBase; /* base address for NTID,NCTAID */ } cp; } prop; diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp index a82465a..5f0f2e7 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp @@ -1304,14 +1304,22 @@ bool NVC0LoweringPass::handleRDSV(Instruction *i) { Symbol *sym = i->getSrc(0)->asSym(); + const SVSemantic sv = sym->reg.data.sv.sv; Value *vtx = NULL; Instruction *ld; uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym); - if (addr >= 0x400) // mov $sreg + if (addr >= 0x400) { + // mov $sreg + if (sym->reg.data.sv.index == 3) { + // TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID + i->op = OP_MOV; + i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0)); + } return true; + } - switch (i->getSrc(0)->reg.data.sv.sv) { + switch (sv) { case SV_POSITION: assert(prog->getType() == Program::TYPE_FRAGMENT); bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL); @@ -1330,6 +1338,19 @@ NVC0LoweringPass::handleRDSV(Instruction *i) assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL); readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index); break; + case SV_NTID: + case SV_NCTAID: + case SV_GRIDID: + assert(targ->getChipset() >= NVISA_GK104_CHIPSET); // mov $sreg otherwise + if (sym->reg.data.sv.index == 3) { + i->op = OP_MOV; + i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1)); + return true; + } + addr += prog->driver->prop.cp.gridInfoBase; + bld.mkLoad(TYPE_U32, i->getDef(0), + bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL); + break; default: if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0)); diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp index 3aa29e2..7f1ac5d 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp @@ -260,6 +260,7 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const const SVSemantic sv = sym->reg.data.sv.sv; const bool isInput = shaderFile == FILE_SHADER_INPUT; + const bool kepler = getChipset() >= NVISA_GK104_CHIPSET; switch (sv) { case SV_POSITION: return 0x070 + idx * 4; @@ -274,6 +275,9 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const case SV_FACE: return 0x3fc; case SV_TESS_FACTOR: return 0x000 + idx * 4; case SV_TESS_COORD: return 0x2f0 + idx * 4; + case SV_NTID: return kepler ? (0x00 + idx * 4) : ~0; + case SV_NCTAID: return kepler ? (0x0c + idx * 4) : ~0; + case SV_GRIDID: return kepler ? 0x18 : ~0; default: return 0xffffffff; } diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index 592d338..2f4eae8 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -573,6 +573,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) info->io.resInfoCBSlot = 0; info->io.texBindBase = NVE4_CP_INPUT_TEX(0); info->io.suInfoBase = NVE4_CP_INPUT_SUF(0); + info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0); } info->io.msInfoCBSlot = 0; info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS; diff --git a/src/gallium/drivers/nvc0/nve4_compute.c b/src/gallium/drivers/nvc0/nve4_compute.c index e823d21..b5344e4 100644 --- a/src/gallium/drivers/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nvc0/nve4_compute.c @@ -335,24 +335,36 @@ nve4_compute_state_validate(struct nvc0_context *nvc0) static void -nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input) +nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input, + const uint *block_layout, + const uint *grid_layout) { struct nvc0_screen *screen = nvc0->screen; struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *cp = nvc0->compprog; - if (!cp->parm_size) - return; - + if (cp->parm_size) { + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->parm->offset); + PUSH_DATA (push, screen->parm->offset); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2); + PUSH_DATA (push, cp->parm_size); + PUSH_DATA (push, 0x1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4)); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA); + PUSH_DATAp(push, input, cp->parm_size / 4); + } BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2); - PUSH_DATAh(push, screen->parm->offset); - PUSH_DATA (push, screen->parm->offset); + PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0)); + PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0)); BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2); - PUSH_DATA (push, cp->parm_size); + PUSH_DATA (push, 7 * 4); PUSH_DATA (push, 0x1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4)); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + 7); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA); - PUSH_DATAp(push, input, cp->parm_size / 4); + PUSH_DATAp(push, block_layout, 3); + PUSH_DATAp(push, grid_layout, 3); + PUSH_DATA (push, 0); BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); @@ -448,7 +460,7 @@ nve4_launch_grid(struct pipe_context *pipe, nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout); nve4_compute_dump_launch_desc(desc); - nve4_compute_upload_input(nvc0, input); + nve4_compute_upload_input(nvc0, input, block_layout, grid_layout); /* upload descriptor and flush */ #if 0 diff --git a/src/gallium/drivers/nvc0/nve4_compute.h b/src/gallium/drivers/nvc0/nve4_compute.h index 82a7748..11d8928 100644 --- a/src/gallium/drivers/nvc0/nve4_compute.h +++ b/src/gallium/drivers/nvc0/nve4_compute.h @@ -7,17 +7,21 @@ /* Input space is implemented as c0[], to which we bind the screen->parm bo. */ -#define NVE4_CP_INPUT_USER 0x0000 -#define NVE4_CP_INPUT_USER_LIMIT 0x1000 -#define NVE4_CP_INPUT_TEX(i) (0x1020 + (i) * 4) -#define NVE4_CP_INPUT_TEX_STRIDE 4 -#define NVE4_CP_INPUT_TEX_MAX 32 -#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0 -#define NVE4_CP_INPUT_SUF_STRIDE 64 -#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE) -#define NVE4_CP_INPUT_SUF_MAX 32 -#define NVE4_CP_INPUT_SIZE 0x1900 -#define NVE4_CP_PARAM_SIZE 0x2000 +#define NVE4_CP_INPUT_USER 0x0000 +#define NVE4_CP_INPUT_USER_LIMIT 0x1000 +#define NVE4_CP_INPUT_GRID_INFO(i) (0x1000 + (i) * 4) +#define NVE4_CP_INPUT_NTID(i) (0x1000 + (i) * 4) +#define NVE4_CP_INPUT_NCTAID(i) (0x100c + (i) * 4) +#define NVE4_CP_INPUT_GRIDID 0x1018 +#define NVE4_CP_INPUT_TEX(i) (0x1040 + (i) * 4) +#define NVE4_CP_INPUT_TEX_STRIDE 4 +#define NVE4_CP_INPUT_TEX_MAX 32 +#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0 +#define NVE4_CP_INPUT_SUF_STRIDE 64 +#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE) +#define NVE4_CP_INPUT_SUF_MAX 32 +#define NVE4_CP_INPUT_SIZE 0x1900 +#define NVE4_CP_PARAM_SIZE 0x2000 struct nve4_cp_launch_desc { -- 2.7.4