From 656ad060518d067a3b311db8c2de2a396fb41898 Mon Sep 17 00:00:00 2001 From: Boyan Ding Date: Thu, 9 Mar 2017 13:55:19 +0800 Subject: [PATCH] gk110/ir: Use the new rcp/rsq in library v2: (Karol Herbst * fix Value setup for the builtins Signed-off-by: Boyan Ding [imirkin: track the fp64 flag when switching ops to calls] Signed-off-by: Ilia Mirkin Cc: 19.0 --- src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 38 ++++++++++++++++++++++ .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + .../drivers/nouveau/codegen/nv50_ir_target.cpp | 1 + 5 files changed, 42 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index 49425b9..993d01c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1119,6 +1119,7 @@ Program::Program(Type type, Target *arch) binSize = 0; maxGPR = -1; + fp64 = false; main = new Function(this, "MAIN", ~0); calls.insert(&main->call); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 8085bb2..8d32a25 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -1311,6 +1311,7 @@ public: uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL int maxGPR; + bool fp64; MemoryPool mem_Instruction; MemoryPool mem_CmpInstruction; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 34cb7dc..65b26dc 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -84,6 +84,38 @@ NVC0LegalizeSSA::handleDIV(Instruction *i) } void +NVC0LegalizeSSA::handleRCPRSQLib(Instruction *i, Value *src[]) +{ + FlowInstruction *call; + Value *def[2]; + int builtin; + + def[0] = bld.mkMovToReg(0, src[0])->getDef(0); + def[1] = bld.mkMovToReg(1, src[1])->getDef(0); + + if (i->op == OP_RCP) + builtin = NVC0_BUILTIN_RCP_F64; + else + builtin = NVC0_BUILTIN_RSQ_F64; + + call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL); + def[0] = bld.getSSA(); + def[1] = bld.getSSA(); + bld.mkMovFromReg(def[0], 0); + bld.mkMovFromReg(def[1], 1); + bld.mkClobber(FILE_GPR, 0x3fc, 2); + bld.mkClobber(FILE_PREDICATE, i->op == OP_RSQ ? 0x3 : 0x1, 0); + bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), def[0], def[1]); + + call->fixed = 1; + call->absolute = call->builtin = 1; + call->target.builtin = builtin; + delete_Instruction(prog, i); + + prog->fp64 = true; +} + +void NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) { assert(i->dType == TYPE_F64); @@ -96,6 +128,12 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) Value *src[2], *dst[2], *def = i->getDef(0); bld.mkSplit(src, 4, i->getSrc(0)); + int chip = prog->getTarget()->getChipset(); + if (chip >= NVISA_GK20A_CHIPSET && chip < NVISA_GM107_CHIPSET) { + handleRCPRSQLib(i, src); + return; + } + // 2. We don't care about the low 32 bits of the destination. Stick a 0 in. dst[0] = bld.loadImm(NULL, 0); dst[1] = bld.getSSA(); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 4679c56..0ce2a4b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -62,6 +62,7 @@ private: // we want to insert calls to the builtin library only after optimization void handleDIV(Instruction *); // integer division, modulus + void handleRCPRSQLib(Instruction *, Value *[]); void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt void handleFTZ(Instruction *); void handleSET(CmpInstruction *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 9193a01..5c6d057 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -399,6 +399,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info) } } } + info->io.fp64 |= fp64; info->bin.relocData = emit->getRelocInfo(); info->bin.fixupData = emit->getFixupInfo(); -- 2.7.4