From b473fcc9a39d0e1cdc1b9d2cd34ea8da2f9fc382 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 20 Jul 2018 15:56:21 +0100 Subject: [PATCH] nvc0: fix bindless multisampled images on Maxwell+ NVC0_CB_AUX_BINDLESS_INFO isn't written to on Maxwell+ and it's too small anyway. With these changes, TXQ is used to determine the number of samples and the coordinate adjustment information looked up in a small array in the driver constant buffer. v2: rework to use TXQ and a small array instead of a larger array with an entry for each texture v3: get rid of the small array and calculate the adjustments in the shader Signed-off-by: Rhys Perry Fixes: c2ae9b40527 ('nvc0: implement multisampled images on Maxwell+') Reviewed-by: Ilia Mirkin --- .../nouveau/codegen/nv50_ir_lowering_gm107.cpp | 4 +- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 43 +++++++++++++++++++++- .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 3 +- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp index c7436e2..49a5f3b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp @@ -320,11 +320,11 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq) if (mask & 0x1) bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0), - loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless)); + loadMsAdjInfo32(suq->tex.target, 0, slot, ind, suq->tex.bindless)); if (mask & 0x2) { int d = util_bitcount(mask & 0x1); bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d), - loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless)); + loadMsAdjInfo32(suq->tex.target, 1, slot, ind, suq->tex.bindless)); } } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 176e0cf..0cbf4d0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1732,6 +1732,45 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless prog->driver->io.suInfoBase); } +inline Value * +NVC0LoweringPass::loadMsAdjInfo32(TexInstruction::Target target, uint32_t index, int slot, Value *ind, bool bindless) +{ + if (!bindless || targ->getChipset() < NVISA_GM107_CHIPSET) + return loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(index), bindless); + + assert(bindless); + + Value *samples = bld.getSSA(); + // this shouldn't be lowered because it's being inserted before the current instruction + TexInstruction *tex = new_TexInstruction(func, OP_TXQ); + tex->tex.target = target; + tex->tex.query = TXQ_TYPE; + tex->tex.mask = 0x4; + tex->tex.r = 0xff; + tex->tex.s = 0x1f; + tex->tex.rIndirectSrc = 0; + tex->setDef(0, samples); + tex->setSrc(0, ind); + tex->setSrc(1, bld.loadImm(NULL, 0)); + bld.insert(tex); + + // doesn't work with sample counts other than 1/2/4/8 but they aren't supported + switch (index) { + case 0: { + Value *tmp = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), samples, bld.mkImm(2)); + return bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(2)); + } + case 1: { + Value *tmp = bld.mkCmp(OP_SET, CC_GT, TYPE_U32, bld.getSSA(), TYPE_U32, samples, bld.mkImm(2))->getDef(0); + return bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(1)); + } + default: { + assert(false); + return NULL; + } + } +} + static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c) { switch (su->tex.target.getEnum()) { @@ -1817,8 +1856,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex) Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA(); Value *ind = tex->getIndirectR(); - Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless); - Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless); + Value *ms_x = loadMsAdjInfo32(tex->tex.target, 0, slot, ind, tex->tex.bindless); + Value *ms_y = loadMsAdjInfo32(tex->tex.target, 1, slot, ind, tex->tex.bindless); bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 5dbb3e4..4136b1ec 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -148,7 +148,7 @@ protected: void handlePIXLD(Instruction *); void checkPredicate(Instruction *); - Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless); + Value *loadMsAdjInfo32(TexInstruction::Target targ, uint32_t index, int slot, Value *ind, bool bindless); virtual bool visit(Instruction *); @@ -161,6 +161,7 @@ private: Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base); Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base); Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base); + Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless); Value *loadBufInfo64(Value *ptr, uint32_t off); Value *loadBufLength32(Value *ptr, uint32_t off); Value *loadUboInfo64(Value *ptr, uint32_t off); -- 2.7.4