From 99e4eba669f13a0dc80880f4f91e2338377c1667 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 2 Mar 2013 14:59:06 +0100 Subject: [PATCH] nv50/ir: implement splitting of 64 bit ops after RA --- src/gallium/drivers/nv50/codegen/nv50_ir.h | 3 +- .../drivers/nv50/codegen/nv50_ir_build_util.cpp | 70 ++++++++++++++++++++++ .../drivers/nv50/codegen/nv50_ir_build_util.h | 4 ++ .../drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp | 27 +++------ .../drivers/nv50/codegen/nv50_ir_peephole.cpp | 2 + .../drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp | 31 ++++------ 6 files changed, 98 insertions(+), 39 deletions(-) diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h index dd7ff90..5481259 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h @@ -48,7 +48,7 @@ enum operation OP_MOV, // simple copy, no modifiers allowed OP_LOAD, OP_STORE, - OP_ADD, + OP_ADD, // NOTE: add u64 + u32 is legal for targets w/o 64-bit integer adds OP_SUB, OP_MUL, OP_DIV, @@ -707,6 +707,7 @@ public: inline void setFlagsSrc(int s, Value *); inline void setFlagsDef(int d, Value *); + inline bool usesFlags() const { return flagsSrc >= 0; } unsigned int defCount() const { return defs.size(); }; unsigned int defCount(unsigned int mask, bool singleFile = false) const; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp index 0af2c61..935af32 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp @@ -541,4 +541,74 @@ BuildUtil::DataArray::mkSymbol(int i, int c) return sym; } + +Instruction * +BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i, + Value *zero, + Value *carry) +{ + DataType hTy; + int srcNr; + + switch (i->dType) { + case TYPE_U64: hTy = TYPE_U32; break; + case TYPE_S64: hTy = TYPE_S32; break; + default: + return NULL; + } + + switch (i->op) { + case OP_MOV: srcNr = 1; break; + case OP_ADD: + case OP_SUB: + if (!carry) + return NULL; + srcNr = 2; + break; + default: + // TODO when needed + return NULL; + } + + i->setType(hTy); + i->setDef(0, cloneShallow(fn, i->getDef(0))); + i->getDef(0)->reg.size = 4; + Instruction *lo = i; + Instruction *hi = cloneForward(fn, i); + lo->bb->insertAfter(lo, hi); + + hi->getDef(0)->reg.data.id++; + + for (int s = 0; s < srcNr; ++s) { + if (lo->getSrc(s)->reg.size < 8) { + hi->setSrc(s, zero); + } else { + if (lo->getSrc(s)->refCount() > 1) + lo->setSrc(s, cloneShallow(fn, lo->getSrc(s))); + lo->getSrc(s)->reg.size /= 2; + hi->setSrc(s, cloneShallow(fn, lo->getSrc(s))); + + switch (hi->src(s).getFile()) { + case FILE_IMMEDIATE: + hi->getSrc(s)->reg.data.u64 >>= 32; + break; + case FILE_MEMORY_CONST: + case FILE_MEMORY_SHARED: + case FILE_SHADER_INPUT: + hi->getSrc(s)->reg.data.offset += 4; + break; + default: + assert(hi->src(s).getFile() == FILE_GPR); + hi->getSrc(s)->reg.data.id++; + break; + } + } + } + if (srcNr == 2) { + lo->setDef(1, carry); + hi->setFlagsSrc(hi->srcCount(), carry); + } + return hi; +} + } // namespace nv50_ir diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h index f48dbc2..dbb9c03 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h @@ -101,6 +101,10 @@ public: Value *loadImm(Value *dst, int i) { return loadImm(dst, (uint32_t)i); } + // returns high part of the operation + static Instruction *split64BitOpPostRA(Function *, Instruction *, + Value *zero, Value *carry); + struct Location { Location(unsigned array, unsigned arrayIdx, unsigned i, unsigned c) diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp index 9c3f8f6..20f76f8 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp @@ -122,7 +122,6 @@ private: void handlePRERET(FlowInstruction *); void replaceZero(Instruction *); - void split64BitOp(Instruction *); LValue *r63; }; @@ -160,22 +159,6 @@ NV50LegalizePostRA::replaceZero(Instruction *i) } } -void -NV50LegalizePostRA::split64BitOp(Instruction *i) -{ - if (i->dType == TYPE_F64) { - if (i->op == OP_MAD) - i->op = OP_FMA; - if (i->op == OP_ADD || i->op == OP_MUL || i->op == OP_FMA || - i->op == OP_CVT || i->op == OP_MIN || i->op == OP_MAX || - i->op == OP_SET) - return; - i->dType = i->sType = TYPE_U32; - - i->bb->insertAfter(i, cloneForward(func, i)); - } -} - // Emulate PRERET: jump to the target and call to the origin from there // // WARNING: atm only works if BBs are affected by at most a single PRERET @@ -229,12 +212,18 @@ NV50LegalizePostRA::visit(BasicBlock *bb) if (i->op == OP_PRERET && prog->getTarget()->getChipset() < 0xa0) { handlePRERET(i->asFlow()); } else { + // TODO: We will want to do this before register allocation, + // since have to use a $c register for the carry flag. + if (typeSizeof(i->dType) == 8) { + Instruction *hi = BuildUtil::split64BitOpPostRA(func, i, r63, NULL); + if (hi) + next = hi; + } + if (i->op != OP_MOV && i->op != OP_PFETCH && i->op != OP_BAR && (!i->defExists(0) || i->def(0).getFile() != FILE_ADDRESS)) replaceZero(i); - if (typeSizeof(i->dType) == 8) - split64BitOp(i); } } if (!bb->getEntry()) diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp index 0af44ba..2926907 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp @@ -667,6 +667,8 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) } break; case OP_ADD: + if (i->usesFlags()) + break; if (imm0.isInteger(0)) { if (s == 0) { i->setSrc(0, i->getSrc(1)); diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp index 94d3cea..414a503 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp @@ -128,7 +128,6 @@ private: virtual bool visit(BasicBlock *); void replaceZero(Instruction *); - void split64BitOp(Instruction *); bool tryReplaceContWithBra(BasicBlock *); void propagateJoin(BasicBlock *); @@ -158,6 +157,7 @@ private: private: LValue *rZero; + LValue *carry; const bool needTexBar; }; @@ -468,8 +468,10 @@ NVC0LegalizePostRA::visit(Function *fn) insertTextureBarriers(fn); rZero = new_LValue(fn, FILE_GPR); + carry = new_LValue(fn, FILE_FLAGS); rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR); + carry->reg.data.id = 0; return true; } @@ -486,22 +488,6 @@ NVC0LegalizePostRA::replaceZero(Instruction *i) } } -void -NVC0LegalizePostRA::split64BitOp(Instruction *i) -{ - if (i->dType == TYPE_F64) { - if (i->op == OP_MAD) - i->op = OP_FMA; - if (i->op == OP_ADD || i->op == OP_MUL || i->op == OP_FMA || - i->op == OP_CVT || i->op == OP_MIN || i->op == OP_MAX || - i->op == OP_SET) - return; - i->dType = i->sType = TYPE_U32; - - i->bb->insertAfter(i, cloneForward(func, i)); - } -} - // replace CONT with BRA for single unconditional continue bool NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb) @@ -565,10 +551,17 @@ NVC0LegalizePostRA::visit(BasicBlock *bb) if (i->isNop()) { bb->remove(i); } else { + // TODO: Move this to before register allocation for operations that + // need the $c register ! + if (typeSizeof(i->dType) == 8) { + Instruction *hi; + hi = BuildUtil::split64BitOpPostRA(func, i, rZero, carry); + if (hi) + next = hi; + } + if (i->op != OP_MOV && i->op != OP_PFETCH) replaceZero(i); - if (typeSizeof(i->dType) == 8) - split64BitOp(i); } } if (!bb->getEntry()) -- 2.7.4