From f4b29f3bf79bd64c7b4855d5f9fde69eb224244a Mon Sep 17 00:00:00 2001 From: GregF Date: Mon, 3 Jul 2017 17:23:04 -0600 Subject: [PATCH] Add CommonUniformElim pass - UniformElim: Only process reachable blocks - UniformElim: Don't reuse loads of samplers and images across blocks. Added a second phase which only reuses loads within a block for samplers and images. - UniformElim: Upgrade CopyObject skipping in GetPtr - UniformElim: Add extensions whitelist Currently disallowing SPV_KHR_variable_pointers because it doesn't handle extended pointer forms. - UniformElim: Do not process shaders with GroupDecorate - UniformElim: Bail on shaders with non-32-bit ints. - UniformElim: Document support for only single index and add TODO. --- include/spirv-tools/optimizer.hpp | 26 + source/opt/CMakeLists.txt | 2 + source/opt/common_uniform_elim_pass.cpp | 626 ++++++++++++++++++++++ source/opt/common_uniform_elim_pass.h | 214 ++++++++ source/opt/optimizer.cpp | 5 + source/opt/passes.h | 1 + test/opt/CMakeLists.txt | 5 + test/opt/common_uniform_elim_test.cpp | 673 ++++++++++++++++++++++++ tools/opt/opt.cpp | 8 + 9 files changed, 1560 insertions(+) create mode 100644 source/opt/common_uniform_elim_pass.cpp create mode 100644 source/opt/common_uniform_elim_pass.h create mode 100644 test/opt/common_uniform_elim_test.cpp diff --git a/include/spirv-tools/optimizer.hpp b/include/spirv-tools/optimizer.hpp index b52de8fe..a06b8f49 100644 --- a/include/spirv-tools/optimizer.hpp +++ b/include/spirv-tools/optimizer.hpp @@ -341,6 +341,20 @@ Optimizer::PassToken CreateInsertExtractElimPass(); // possible. Optimizer::PassToken CreateDeadBranchElimPass(); +// Creates a pass to consolidate uniform references. +// For each entry point function in the module, first change all constant index +// access chain loads into equivalent composite extracts. Then consolidate +// identical uniform loads into one uniform load. Finally, consolidate +// identical uniform extracts into one uniform extract. This may require +// moving a load or extract to a point which dominates all uses. +// +// This pass requires a module to have structured control flow ie shader +// capability. It also requires logical addressing ie Addresses capability +// is not enabled. It also currently does not support any extensions. +// +// This pass currently only optimizes loads with a single index. +Optimizer::PassToken CreateCommonUniformElimPass(); + // Create aggressive dead code elimination pass // This pass eliminates unused code from functions. In addition, // it detects and eliminates code which may have spurious uses but which do @@ -362,6 +376,18 @@ Optimizer::PassToken CreateDeadBranchElimPass(); // eliminated with standard dead code elimination. Optimizer::PassToken CreateAggressiveDCEPass(); +// Creates a pass to consolidate uniform references. +// For each entry point function in the module, first change all constant index +// access chain loads into equivalent composite extracts. Then consolidate +// identical uniform loads into one uniform load. Finally, consolidate +// identical uniform extracts into one uniform extract. This may require +// moving a load or extract to a point which dominates all uses. +// +// This pass requires a module to have structured control flow ie shader +// capability. It also requires logical addressing ie Addresses capability +// is not enabled. It also currently does not support any extensions. +Optimizer::PassToken CreateCommonUniformElimPass(); + // Creates a compact ids pass. // The pass remaps result ids to a compact and gapless range starting from %1. Optimizer::PassToken CreateCompactIdsPass(); diff --git a/source/opt/CMakeLists.txt b/source/opt/CMakeLists.txt index de58f1a5..856a880b 100644 --- a/source/opt/CMakeLists.txt +++ b/source/opt/CMakeLists.txt @@ -16,6 +16,7 @@ add_library(SPIRV-Tools-opt basic_block.h block_merge_pass.h build_module.h + common_uniform_elim_pass.h compact_ids_pass.h constants.h dead_branch_elim_pass.h @@ -51,6 +52,7 @@ add_library(SPIRV-Tools-opt basic_block.cpp block_merge_pass.cpp build_module.cpp + common_uniform_elim_pass.cpp compact_ids_pass.cpp def_use_manager.cpp dead_branch_elim_pass.cpp diff --git a/source/opt/common_uniform_elim_pass.cpp b/source/opt/common_uniform_elim_pass.cpp new file mode 100644 index 00000000..6872fff2 --- /dev/null +++ b/source/opt/common_uniform_elim_pass.cpp @@ -0,0 +1,626 @@ +// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2017 Valve Corporation +// Copyright (c) 2017 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "common_uniform_elim_pass.h" + +#include "cfa.h" +#include "iterator.h" + +namespace spvtools { +namespace opt { + +namespace { + +const uint32_t kEntryPointFunctionIdInIdx = 1; +const uint32_t kAccessChainPtrIdInIdx = 0; +const uint32_t kTypePointerStorageClassInIdx = 0; +const uint32_t kTypePointerTypeIdInIdx = 1; +const uint32_t kConstantValueInIdx = 0; +const uint32_t kExtractCompositeIdInIdx = 0; +const uint32_t kExtractIdx0InIdx = 1; +const uint32_t kSelectionMergeMergeBlockIdInIdx = 0; +const uint32_t kLoopMergeMergeBlockIdInIdx = 0; +const uint32_t kLoopMergeContinueBlockIdInIdx = 1; +const uint32_t kStorePtrIdInIdx = 0; +const uint32_t kLoadPtrIdInIdx = 0; +const uint32_t kCopyObjectOperandInIdx = 0; +const uint32_t kTypeIntWidthInIdx = 0; + +} // anonymous namespace + +bool CommonUniformElimPass::IsNonPtrAccessChain(const SpvOp opcode) const { + return opcode == SpvOpAccessChain || opcode == SpvOpInBoundsAccessChain; +} + +bool CommonUniformElimPass::IsSamplerOrImageType( + const ir::Instruction* typeInst) const { + switch (typeInst->opcode()) { + case SpvOpTypeSampler: + case SpvOpTypeImage: + case SpvOpTypeSampledImage: + return true; + default: + break; + } + if (typeInst->opcode() != SpvOpTypeStruct) + return false; + // Return true if any member is a sampler or image + int samplerOrImageCnt = 0; + typeInst->ForEachInId([&samplerOrImageCnt, this](const uint32_t* tid) { + const ir::Instruction* compTypeInst = def_use_mgr_->GetDef(*tid); + if (IsSamplerOrImageType(compTypeInst)) ++samplerOrImageCnt; + }); + return samplerOrImageCnt > 0; +} + +bool CommonUniformElimPass::IsSamplerOrImageVar( + uint32_t varId) const { + const ir::Instruction* varInst = def_use_mgr_->GetDef(varId); + assert(varInst->opcode() == SpvOpVariable); + const uint32_t varTypeId = varInst->type_id(); + const ir::Instruction* varTypeInst = def_use_mgr_->GetDef(varTypeId); + const uint32_t varPteTypeId = + varTypeInst->GetSingleWordInOperand(kTypePointerTypeIdInIdx); + ir::Instruction* varPteTypeInst = def_use_mgr_->GetDef(varPteTypeId); + return IsSamplerOrImageType(varPteTypeInst); +} + +bool CommonUniformElimPass::IsLoopHeader(ir::BasicBlock* block_ptr) { + auto iItr = block_ptr->tail(); + if (iItr == block_ptr->begin()) + return false; + --iItr; + return iItr->opcode() == SpvOpLoopMerge; +} + +uint32_t CommonUniformElimPass::MergeBlockIdIfAny(const ir::BasicBlock& blk, + uint32_t* cbid) { + auto merge_ii = blk.cend(); + --merge_ii; + *cbid = 0; + uint32_t mbid = 0; + if (merge_ii != blk.cbegin()) { + --merge_ii; + if (merge_ii->opcode() == SpvOpLoopMerge) { + mbid = merge_ii->GetSingleWordInOperand(kLoopMergeMergeBlockIdInIdx); + *cbid = merge_ii->GetSingleWordInOperand(kLoopMergeContinueBlockIdInIdx); + } + else if (merge_ii->opcode() == SpvOpSelectionMerge) { + mbid = merge_ii->GetSingleWordInOperand(kSelectionMergeMergeBlockIdInIdx); + } + } + return mbid; +} + +ir::Instruction* CommonUniformElimPass::GetPtr( + ir::Instruction* ip, uint32_t* varId) { + const SpvOp op = ip->opcode(); + assert(op == SpvOpStore || op == SpvOpLoad); + *varId = ip->GetSingleWordInOperand( + op == SpvOpStore ? kStorePtrIdInIdx : kLoadPtrIdInIdx); + ir::Instruction* ptrInst = def_use_mgr_->GetDef(*varId); + while (ptrInst->opcode() == SpvOpCopyObject) { + *varId = ptrInst->GetSingleWordInOperand(kCopyObjectOperandInIdx); + ptrInst = def_use_mgr_->GetDef(*varId); + } + ir::Instruction* varInst = ptrInst; + while (varInst->opcode() != SpvOpVariable) { + if (IsNonPtrAccessChain(varInst->opcode())) { + *varId = varInst->GetSingleWordInOperand(kAccessChainPtrIdInIdx); + } + else { + assert(varInst->opcode() == SpvOpCopyObject); + *varId = varInst->GetSingleWordInOperand(kCopyObjectOperandInIdx); + } + varInst = def_use_mgr_->GetDef(*varId); + } + return ptrInst; +} + +bool CommonUniformElimPass::IsUniformVar(uint32_t varId) { + const ir::Instruction* varInst = + def_use_mgr_->id_to_defs().find(varId)->second; + assert(varInst->opcode() == SpvOpVariable); + const uint32_t varTypeId = varInst->type_id(); + const ir::Instruction* varTypeInst = + def_use_mgr_->id_to_defs().find(varTypeId)->second; + return varTypeInst->GetSingleWordInOperand(kTypePointerStorageClassInIdx) == + SpvStorageClassUniform || + varTypeInst->GetSingleWordInOperand(kTypePointerStorageClassInIdx) == + SpvStorageClassUniformConstant; +} + +bool CommonUniformElimPass::HasUnsupportedDecorates(uint32_t id) const { + analysis::UseList* uses = def_use_mgr_->GetUses(id); + if (uses == nullptr) + return false; + for (auto u : *uses) { + const SpvOp op = u.inst->opcode(); + if (IsNonTypeDecorate(op)) + return true; + } + return false; +} + +bool CommonUniformElimPass::HasOnlyNamesAndDecorates(uint32_t id) const { + analysis::UseList* uses = def_use_mgr_->GetUses(id); + if (uses == nullptr) + return true; + for (auto u : *uses) { + const SpvOp op = u.inst->opcode(); + if (op != SpvOpName && !IsNonTypeDecorate(op)) + return false; + } + return true; +} + +void CommonUniformElimPass::KillNamesAndDecorates(uint32_t id) { + // TODO(greg-lunarg): Remove id from any OpGroupDecorate and + // kill if no other operands. + analysis::UseList* uses = def_use_mgr_->GetUses(id); + if (uses == nullptr) + return; + std::list killList; + for (auto u : *uses) { + const SpvOp op = u.inst->opcode(); + if (op != SpvOpName && !IsNonTypeDecorate(op)) + continue; + killList.push_back(u.inst); + } + for (auto kip : killList) + def_use_mgr_->KillInst(kip); +} + +void CommonUniformElimPass::KillNamesAndDecorates(ir::Instruction* inst) { + // TODO(greg-lunarg): Remove inst from any OpGroupDecorate and + // kill if not other operands. + const uint32_t rId = inst->result_id(); + if (rId == 0) + return; + KillNamesAndDecorates(rId); +} + +void CommonUniformElimPass::DeleteIfUseless(ir::Instruction* inst) { + const uint32_t resId = inst->result_id(); + assert(resId != 0); + if (HasOnlyNamesAndDecorates(resId)) { + KillNamesAndDecorates(resId); + def_use_mgr_->KillInst(inst); + } +} + +void CommonUniformElimPass::ReplaceAndDeleteLoad(ir::Instruction* loadInst, + uint32_t replId, + ir::Instruction* ptrInst) { + const uint32_t loadId = loadInst->result_id(); + KillNamesAndDecorates(loadId); + (void) def_use_mgr_->ReplaceAllUsesWith(loadId, replId); + // remove load instruction + def_use_mgr_->KillInst(loadInst); + // if access chain, see if it can be removed as well + if (IsNonPtrAccessChain(ptrInst->opcode())) + DeleteIfUseless(ptrInst); +} + +uint32_t CommonUniformElimPass::GetPointeeTypeId(const ir::Instruction* ptrInst) { + const uint32_t ptrTypeId = ptrInst->type_id(); + const ir::Instruction* ptrTypeInst = def_use_mgr_->GetDef(ptrTypeId); + return ptrTypeInst->GetSingleWordInOperand(kTypePointerTypeIdInIdx); +} + +void CommonUniformElimPass::GenACLoadRepl(const ir::Instruction* ptrInst, + std::vector>* newInsts, + uint32_t* resultId) { + + // Build and append Load + const uint32_t ldResultId = TakeNextId(); + const uint32_t varId = + ptrInst->GetSingleWordInOperand(kAccessChainPtrIdInIdx); + const ir::Instruction* varInst = def_use_mgr_->GetDef(varId); + assert(varInst->opcode() == SpvOpVariable); + const uint32_t varPteTypeId = GetPointeeTypeId(varInst); + std::vector load_in_operands; + load_in_operands.push_back( + ir::Operand(spv_operand_type_t::SPV_OPERAND_TYPE_ID, + std::initializer_list{varId})); + std::unique_ptr newLoad(new ir::Instruction(SpvOpLoad, + varPteTypeId, ldResultId, load_in_operands)); + def_use_mgr_->AnalyzeInstDefUse(&*newLoad); + newInsts->emplace_back(std::move(newLoad)); + + // Build and append Extract + const uint32_t extResultId = TakeNextId(); + const uint32_t ptrPteTypeId = GetPointeeTypeId(ptrInst); + std::vector ext_in_opnds; + ext_in_opnds.push_back( + ir::Operand(spv_operand_type_t::SPV_OPERAND_TYPE_ID, + std::initializer_list{ldResultId})); + uint32_t iidIdx = 0; + ptrInst->ForEachInId([&iidIdx, &ext_in_opnds, this](const uint32_t *iid) { + if (iidIdx > 0) { + const ir::Instruction* cInst = def_use_mgr_->GetDef(*iid); + uint32_t val = cInst->GetSingleWordInOperand(kConstantValueInIdx); + ext_in_opnds.push_back( + ir::Operand(spv_operand_type_t::SPV_OPERAND_TYPE_LITERAL_INTEGER, + std::initializer_list{val})); + } + ++iidIdx; + }); + std::unique_ptr newExt(new ir::Instruction( + SpvOpCompositeExtract, ptrPteTypeId, extResultId, ext_in_opnds)); + def_use_mgr_->AnalyzeInstDefUse(&*newExt); + newInsts->emplace_back(std::move(newExt)); + *resultId = extResultId; +} + +bool CommonUniformElimPass::IsConstantIndexAccessChain(ir::Instruction* acp) { + uint32_t inIdx = 0; + uint32_t nonConstCnt = 0; + acp->ForEachInId([&inIdx, &nonConstCnt, this](uint32_t* tid) { + if (inIdx > 0) { + ir::Instruction* opInst = def_use_mgr_->GetDef(*tid); + if (opInst->opcode() != SpvOpConstant) ++nonConstCnt; + } + ++inIdx; + }); + return nonConstCnt == 0; +} + +bool CommonUniformElimPass::UniformAccessChainConvert(ir::Function* func) { + bool modified = false; + for (auto bi = func->begin(); bi != func->end(); ++bi) { + for (auto ii = bi->begin(); ii != bi->end(); ++ii) { + if (ii->opcode() != SpvOpLoad) + continue; + uint32_t varId; + ir::Instruction* ptrInst = GetPtr(&*ii, &varId); + if (!IsNonPtrAccessChain(ptrInst->opcode())) + continue; + // Do not convert nested access chains + if (ptrInst->GetSingleWordInOperand(kAccessChainPtrIdInIdx) != varId) + continue; + if (!IsUniformVar(varId)) + continue; + if (!IsConstantIndexAccessChain(ptrInst)) + continue; + if (HasUnsupportedDecorates(ii->result_id())) + continue; + if (HasUnsupportedDecorates(ptrInst->result_id())) + continue; + std::vector> newInsts; + uint32_t replId; + GenACLoadRepl(ptrInst, &newInsts, &replId); + ReplaceAndDeleteLoad(&*ii, replId, ptrInst); + ++ii; + ii = ii.InsertBefore(&newInsts); + ++ii; + modified = true; + } + } + return modified; +} + +void CommonUniformElimPass::ComputeStructuredSuccessors(ir::Function* func) { + for (auto& blk : *func) { + // If header, make merge block first successor. + uint32_t cbid; + const uint32_t mbid = MergeBlockIdIfAny(blk, &cbid); + if (mbid != 0) { + block2structured_succs_[&blk].push_back(id2block_[mbid]); + if (cbid != 0) + block2structured_succs_[&blk].push_back(id2block_[cbid]); + } + // add true successors + blk.ForEachSuccessorLabel([&blk, this](uint32_t sbid) { + block2structured_succs_[&blk].push_back(id2block_[sbid]); + }); + } +} + +void CommonUniformElimPass::ComputeStructuredOrder( + ir::Function* func, std::list* order) { + // Compute structured successors and do DFS + ComputeStructuredSuccessors(func); + auto ignore_block = [](cbb_ptr) {}; + auto ignore_edge = [](cbb_ptr, cbb_ptr) {}; + auto get_structured_successors = [this](const ir::BasicBlock* block) { + return &(block2structured_succs_[block]); }; + // TODO(greg-lunarg): Get rid of const_cast by making moving const + // out of the cfa.h prototypes and into the invoking code. + auto post_order = [&](cbb_ptr b) { + order->push_front(const_cast(b)); }; + + order->clear(); + spvtools::CFA::DepthFirstTraversal( + &*func->begin(), get_structured_successors, ignore_block, + post_order, ignore_edge); +} + +bool CommonUniformElimPass::CommonUniformLoadElimination(ir::Function* func) { + // Process all blocks in structured order. This is just one way (the + // simplest?) to keep track of the most recent block outside of control + // flow, used to copy common instructions, guaranteed to dominate all + // following load sites. + std::list structuredOrder; + ComputeStructuredOrder(func, &structuredOrder); + uniform2load_id_.clear(); + bool modified = false; + // Find insertion point in first block to copy non-dominating loads. + auto insertItr = func->begin()->begin(); + while (insertItr->opcode() == SpvOpVariable || + insertItr->opcode() == SpvOpNop) + ++insertItr; + uint32_t mergeBlockId = 0; + for (auto bi = structuredOrder.begin(); bi != structuredOrder.end(); ++bi) { + ir::BasicBlock* bp = *bi; + // Check if we are exiting outermost control construct. If so, remember + // new load insertion point. Trying to keep register pressure down. + if (mergeBlockId == bp->id()) { + mergeBlockId = 0; + insertItr = bp->begin(); + } + for (auto ii = bp->begin(); ii != bp->end(); ++ii) { + if (ii->opcode() != SpvOpLoad) + continue; + uint32_t varId; + ir::Instruction* ptrInst = GetPtr(&*ii, &varId); + if (ptrInst->opcode() != SpvOpVariable) + continue; + if (!IsUniformVar(varId)) + continue; + if (IsSamplerOrImageVar(varId)) + continue; + if (HasUnsupportedDecorates(ii->result_id())) + continue; + uint32_t replId; + const auto uItr = uniform2load_id_.find(varId); + if (uItr != uniform2load_id_.end()) { + replId = uItr->second; + } + else { + if (mergeBlockId == 0) { + // Load is in dominating block; just remember it + uniform2load_id_[varId] = ii->result_id(); + continue; + } + else { + // Copy load into most recent dominating block and remember it + replId = TakeNextId(); + std::unique_ptr newLoad(new ir::Instruction(SpvOpLoad, + ii->type_id(), replId, {{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {varId}}})); + def_use_mgr_->AnalyzeInstDefUse(&*newLoad); + insertItr = insertItr.InsertBefore(std::move(newLoad)); + ++insertItr; + uniform2load_id_[varId] = replId; + } + } + ReplaceAndDeleteLoad(&*ii, replId, ptrInst); + modified = true; + } + // If we are outside of any control construct and entering one, remember + // the id of the merge block + if (mergeBlockId == 0) { + uint32_t dummy; + mergeBlockId = MergeBlockIdIfAny(*bp, &dummy); + } + } + return modified; +} + +bool CommonUniformElimPass::CommonUniformLoadElimBlock(ir::Function* func) { + bool modified = false; + for (auto& blk : *func) { + uniform2load_id_.clear(); + for (auto ii = blk.begin(); ii != blk.end(); ++ii) { + if (ii->opcode() != SpvOpLoad) + continue; + uint32_t varId; + ir::Instruction* ptrInst = GetPtr(&*ii, &varId); + if (ptrInst->opcode() != SpvOpVariable) + continue; + if (!IsUniformVar(varId)) + continue; + if (!IsSamplerOrImageVar(varId)) + continue; + if (HasUnsupportedDecorates(ii->result_id())) + continue; + uint32_t replId; + const auto uItr = uniform2load_id_.find(varId); + if (uItr != uniform2load_id_.end()) { + replId = uItr->second; + } + else { + uniform2load_id_[varId] = ii->result_id(); + continue; + } + ReplaceAndDeleteLoad(&*ii, replId, ptrInst); + modified = true; + } + } + return modified; +} + +bool CommonUniformElimPass::CommonExtractElimination(ir::Function* func) { + // Find all composite ids with duplicate extracts. + for (auto bi = func->begin(); bi != func->end(); ++bi) { + for (auto ii = bi->begin(); ii != bi->end(); ++ii) { + if (ii->opcode() != SpvOpCompositeExtract) + continue; + // TODO(greg-lunarg): Support multiple indices + if (ii->NumInOperands() > 2) + continue; + if (HasUnsupportedDecorates(ii->result_id())) + continue; + uint32_t compId = ii->GetSingleWordInOperand(kExtractCompositeIdInIdx); + uint32_t idx = ii->GetSingleWordInOperand(kExtractIdx0InIdx); + comp2idx2inst_[compId][idx].push_back(&*ii); + } + } + // For all defs of ids with duplicate extracts, insert new extracts + // after def, and replace and delete old extracts + bool modified = false; + for (auto bi = func->begin(); bi != func->end(); ++bi) { + for (auto ii = bi->begin(); ii != bi->end(); ++ii) { + const auto cItr = comp2idx2inst_.find(ii->result_id()); + if (cItr == comp2idx2inst_.end()) + continue; + for (auto idxItr : cItr->second) { + if (idxItr.second.size() < 2) + continue; + uint32_t replId = TakeNextId(); + std::unique_ptr newExtract(new ir::Instruction(*idxItr.second.front())); + newExtract->SetResultId(replId); + def_use_mgr_->AnalyzeInstDefUse(&*newExtract); + ++ii; + ii = ii.InsertBefore(std::move(newExtract)); + for (auto instItr : idxItr.second) { + uint32_t resId = instItr->result_id(); + KillNamesAndDecorates(resId); + (void)def_use_mgr_->ReplaceAllUsesWith(resId, replId); + def_use_mgr_->KillInst(instItr); + } + modified = true; + } + } + } + return modified; +} + +bool CommonUniformElimPass::EliminateCommonUniform(ir::Function* func) { + bool modified = false; + modified |= UniformAccessChainConvert(func); + modified |= CommonUniformLoadElimination(func); + modified |= CommonExtractElimination(func); + + modified |= CommonUniformLoadElimBlock(func); + return modified; +} + +void CommonUniformElimPass::Initialize(ir::Module* module) { + + module_ = module; + + // Initialize function and block maps + id2function_.clear(); + id2block_.clear(); + for (auto& fn : *module_) { + id2function_[fn.result_id()] = &fn; + for (auto& blk : fn) + id2block_[blk.id()] = &blk; + } + + // Clear collections + block2structured_succs_.clear(); + label2preds_.clear(); + comp2idx2inst_.clear(); + + // TODO(greg-lunarg): Use def/use from previous pass + def_use_mgr_.reset(new analysis::DefUseManager(consumer(), module_)); + + // Initialize next unused Id. + next_id_ = module->id_bound(); + + // Initialize extension whitelist + InitExtensions(); +}; + + +bool CommonUniformElimPass::AllExtensionsSupported() const { + // If any extension not in whitelist, return false + for (auto& ei : module_->extensions()) { + const char* extName = reinterpret_cast( + &ei.GetInOperand(0).words[0]); + if (extensions_whitelist_.find(extName) == extensions_whitelist_.end()) + return false; + } + return true; +} + +Pass::Status CommonUniformElimPass::ProcessImpl() { + // Assumes all control flow structured. + // TODO(greg-lunarg): Do SSA rewrite for non-structured control flow + if (!module_->HasCapability(SpvCapabilityShader)) + return Status::SuccessWithoutChange; + // Assumes logical addressing only + // TODO(greg-lunarg): Add support for physical addressing + if (module_->HasCapability(SpvCapabilityAddresses)) + return Status::SuccessWithoutChange; + // Do not process if any disallowed extensions are enabled + if (!AllExtensionsSupported()) + return Status::SuccessWithoutChange; + // Do not process if module contains OpGroupDecorate. Additional + // support required in KillNamesAndDecorates(). + // TODO(greg-lunarg): Add support for OpGroupDecorate + for (auto& ai : module_->annotations()) + if (ai.opcode() == SpvOpGroupDecorate) + return Status::SuccessWithoutChange; + // If non-32-bit integer type in module, terminate processing + // TODO(): Handle non-32-bit integer constants in access chains + for (const ir::Instruction& inst : module_->types_values()) + if (inst.opcode() == SpvOpTypeInt && + inst.GetSingleWordInOperand(kTypeIntWidthInIdx) != 32) + return Status::SuccessWithoutChange; + // Process entry point functions + bool modified = false; + for (auto& e : module_->entry_points()) { + ir::Function* fn = + id2function_[e.GetSingleWordInOperand(kEntryPointFunctionIdInIdx)]; + modified = EliminateCommonUniform(fn) || modified; + } + FinalizeNextId(module_); + return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange; +} + +CommonUniformElimPass::CommonUniformElimPass() + : module_(nullptr), def_use_mgr_(nullptr), next_id_(0) {} + +Pass::Status CommonUniformElimPass::Process(ir::Module* module) { + Initialize(module); + return ProcessImpl(); +} + +void CommonUniformElimPass::InitExtensions() { + extensions_whitelist_.clear(); + extensions_whitelist_.insert({ + "SPV_AMD_shader_explicit_vertex_parameter", + "SPV_AMD_shader_trinary_minmax", + "SPV_AMD_gcn_shader", + "SPV_KHR_shader_ballot", + "SPV_AMD_shader_ballot", + "SPV_AMD_gpu_shader_half_float", + "SPV_KHR_shader_draw_parameters", + "SPV_KHR_subgroup_vote", + "SPV_KHR_16bit_storage", + "SPV_KHR_device_group", + "SPV_KHR_multiview", + "SPV_NVX_multiview_per_view_attributes", + "SPV_NV_viewport_array2", + "SPV_NV_stereo_view_rendering", + "SPV_NV_sample_mask_override_coverage", + "SPV_NV_geometry_shader_passthrough", + "SPV_AMD_texture_gather_bias_lod", + "SPV_KHR_storage_buffer_storage_class", + // SPV_KHR_variable_pointers + // Currently do not support extended pointer expressions + "SPV_AMD_gpu_shader_int16", + "SPV_KHR_post_depth_coverage", + "SPV_KHR_shader_atomic_counter_ops", + }); +} + +} // namespace opt +} // namespace spvtools + diff --git a/source/opt/common_uniform_elim_pass.h b/source/opt/common_uniform_elim_pass.h new file mode 100644 index 00000000..a02ed283 --- /dev/null +++ b/source/opt/common_uniform_elim_pass.h @@ -0,0 +1,214 @@ +// Copyright (c) 2016 The Khronos Group Inc. +// Copyright (c) 2016 Valve Corporation +// Copyright (c) 2016 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_OPT_COMMON_UNIFORM_ELIM_PASS_H_ +#define LIBSPIRV_OPT_COMMON_UNIFORM_ELIM_PASS_H_ + +#include +#include +#include +#include +#include +#include + +#include "def_use_manager.h" +#include "module.h" +#include "basic_block.h" +#include "pass.h" + +namespace spvtools { +namespace opt { + +// See optimizer.hpp for documentation. +class CommonUniformElimPass : public Pass { + using cbb_ptr = const ir::BasicBlock*; + + public: + using GetBlocksFunction = + std::function*(const ir::BasicBlock*)>; + + CommonUniformElimPass(); + const char* name() const override { return "common-uniform-elim"; } + Status Process(ir::Module*) override; + + private: + // Returns true if |opcode| is a non-ptr access chain op + bool IsNonPtrAccessChain(const SpvOp opcode) const; + + // Returns true if |typeInst| is a sampler or image type or a struct + // containing one, recursively. + bool IsSamplerOrImageType(const ir::Instruction* typeInst) const; + + // Returns true if |varId| is a variable containing a sampler or image. + bool IsSamplerOrImageVar(uint32_t varId) const; + + // Return true if |block_ptr| is loop header block + bool IsLoopHeader(ir::BasicBlock* block_ptr); + + // Given a load or store pointed at by |ip|, return the pointer + // instruction. Also return the variable's id in |varId|. + ir::Instruction* GetPtr(ir::Instruction* ip, uint32_t* varId); + + // Return true if variable is uniform + bool IsUniformVar(uint32_t varId); + + // Return true if any uses of |id| are decorate ops. + bool HasUnsupportedDecorates(uint32_t id) const; + + // Return true if all uses of |id| are only name or decorate ops. + bool HasOnlyNamesAndDecorates(uint32_t id) const; + + // Kill all name and decorate ops using |inst| + void KillNamesAndDecorates(ir::Instruction* inst); + + // Kill all name and decorate ops using |id| + void KillNamesAndDecorates(uint32_t id); + + // Delete inst if it has no uses. Assumes inst has a resultId. + void DeleteIfUseless(ir::Instruction* inst); + + // Replace all instances of load's id with replId and delete load + // and its access chain, if any + void ReplaceAndDeleteLoad(ir::Instruction* loadInst, + uint32_t replId, + ir::Instruction* ptrInst); + + // Return type id for pointer's pointee + uint32_t GetPointeeTypeId(const ir::Instruction* ptrInst); + + // For the (constant index) access chain ptrInst, create an + // equivalent load and extract + void GenACLoadRepl(const ir::Instruction* ptrInst, + std::vector>* newInsts, + uint32_t* resultId); + + // Return true if all indices are constant + bool IsConstantIndexAccessChain(ir::Instruction* acp); + + // Convert all uniform access chain loads into load/extract. + bool UniformAccessChainConvert(ir::Function* func); + + // Returns the id of the merge block declared by a merge instruction in + // this block, if any. If none, returns zero. + uint32_t MergeBlockIdIfAny(const ir::BasicBlock& blk, uint32_t* cbid); + + // Compute structured successors for function |func|. + // A block's structured successors are the blocks it branches to + // together with its declared merge block if it has one. + // When order matters, the merge block always appears first. + // This assures correct depth first search in the presence of early + // returns and kills. If the successor vector contain duplicates + // if the merge block, they are safely ignored by DFS. + void ComputeStructuredSuccessors(ir::Function* func); + + // Compute structured block order for |func| into |structuredOrder|. This + // order has the property that dominators come before all blocks they + // dominate and merge blocks come after all blocks that are in the control + // constructs of their header. + void ComputeStructuredOrder(ir::Function* func, + std::list* order); + + // Eliminate loads of uniform variables which have previously been loaded. + // If first load is in control flow, move it to first block of function. + // Most effective if preceded by UniformAccessChainRemoval(). + bool CommonUniformLoadElimination(ir::Function* func); + + // Eliminate loads of uniform sampler and image variables which have previously + // been loaded in the same block for types whose loads cannot cross blocks. + bool CommonUniformLoadElimBlock(ir::Function* func); + + // Eliminate duplicated extracts of same id. Extract may be moved to same + // block as the id definition. This is primarily intended for extracts + // from uniform loads. Most effective if preceded by + // CommonUniformLoadElimination(). + bool CommonExtractElimination(ir::Function* func); + + // For function |func|, first change all uniform constant index + // access chain loads into equivalent composite extracts. Then consolidate + // identical uniform loads into one uniform load. Finally, consolidate + // identical uniform extracts into one uniform extract. This may require + // moving a load or extract to a point which dominates all uses. + // Return true if func is modified. + // + // This pass requires the function to have structured control flow ie shader + // capability. It also requires logical addressing ie Addresses capability + // is not enabled. It also currently does not support any extensions. + // + // This function currently only optimizes loads with a single index. + bool EliminateCommonUniform(ir::Function* func); + + // Initialize extensions whitelist + void InitExtensions(); + + // Return true if all extensions in this module are allowed by this pass. + bool AllExtensionsSupported() const; + + // Return true if |op| is a decorate for non-type instruction + inline bool IsNonTypeDecorate(uint32_t op) const { + return (op == SpvOpDecorate || op == SpvOpDecorateId); + } + + inline void FinalizeNextId(ir::Module* module) { + module->SetIdBound(next_id_); + } + + inline uint32_t TakeNextId() { + return next_id_++; + } + + void Initialize(ir::Module* module); + Pass::Status ProcessImpl(); + + // Module this pass is processing + ir::Module* module_; + + // Def-Uses for the module we are processing + std::unique_ptr def_use_mgr_; + + // Map from function's result id to function + std::unordered_map id2function_; + + // Map from block's label id to block. + std::unordered_map id2block_; + + // Map from block to its structured successor blocks. See + // ComputeStructuredSuccessors() for definition. + std::unordered_map> + block2structured_succs_; + + // Map from block's label id to its predecessor blocks ids + std::unordered_map> label2preds_; + + // Map from uniform variable id to its common load id + std::unordered_map uniform2load_id_; + + // Map of extract composite ids to map of indices to insts + // TODO(greg-lunarg): Consider std::vector. + std::unordered_map>> comp2idx2inst_; + + // Extensions supported by this pass. + std::unordered_set extensions_whitelist_; + + // Next unused ID + uint32_t next_id_; +}; + +} // namespace opt +} // namespace spvtools + +#endif // LIBSPIRV_OPT_SSAMEM_PASS_H_ + diff --git a/source/opt/optimizer.cpp b/source/opt/optimizer.cpp index 80d86aeb..62e5be51 100644 --- a/source/opt/optimizer.cpp +++ b/source/opt/optimizer.cpp @@ -176,6 +176,11 @@ Optimizer::PassToken CreateAggressiveDCEPass() { MakeUnique()); } +Optimizer::PassToken CreateCommonUniformElimPass() { + return MakeUnique( + MakeUnique()); +} + Optimizer::PassToken CreateCompactIdsPass() { return MakeUnique( MakeUnique()); diff --git a/source/opt/passes.h b/source/opt/passes.h index f6d69619..9e6ad72c 100644 --- a/source/opt/passes.h +++ b/source/opt/passes.h @@ -18,6 +18,7 @@ // A single header to include all passes. #include "block_merge_pass.h" +#include "common_uniform_elim_pass.h" #include "compact_ids_pass.h" #include "dead_branch_elim_pass.h" #include "eliminate_dead_constant_pass.h" diff --git a/test/opt/CMakeLists.txt b/test/opt/CMakeLists.txt index 75583270..0aa94c42 100644 --- a/test/opt/CMakeLists.txt +++ b/test/opt/CMakeLists.txt @@ -98,6 +98,11 @@ add_spvtools_unittest(TARGET pass_aggressive_dce LIBS SPIRV-Tools-opt ) +add_spvtools_unittest(TARGET pass_common_uniform_elim + SRCS common_uniform_elim_test.cpp pass_utils.cpp + LIBS SPIRV-Tools-opt +) + add_spvtools_unittest(TARGET pass_eliminate_dead_const SRCS eliminate_dead_const_test.cpp pass_utils.cpp LIBS SPIRV-Tools-opt diff --git a/test/opt/common_uniform_elim_test.cpp b/test/opt/common_uniform_elim_test.cpp new file mode 100644 index 00000000..61917e82 --- /dev/null +++ b/test/opt/common_uniform_elim_test.cpp @@ -0,0 +1,673 @@ +// Copyright (c) 2017 Valve Corporation +// Copyright (c) 2017 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "pass_fixture.h" +#include "pass_utils.h" + +namespace { + +using namespace spvtools; + +using CommonUniformElimTest = PassTest<::testing::Test>; + +TEST_F(CommonUniformElimTest, Basic1) { + // Note: This test exemplifies the following: + // - Common uniform (%_) load floated to nearest non-controlled block + // - Common extract (g_F) floated to non-controlled block + // - Non-common extract (g_F2) not floated, but common uniform load shared + // + // #version 140 + // in vec4 BaseColor; + // in float fi; + // + // layout(std140) uniform U_t + // { + // float g_F; + // float g_F2; + // } ; + // + // void main() + // { + // vec4 v = BaseColor; + // if (fi > 0) { + // v = v * g_F; + // } + // else { + // float f2 = g_F2 - g_F; + // v = v * f2; + // } + // gl_FragColor = v; + // } + + const std::string predefs = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %BaseColor %fi %gl_FragColor +OpExecutionMode %main OriginUpperLeft +OpSource GLSL 140 +OpName %main "main" +OpName %v "v" +OpName %BaseColor "BaseColor" +OpName %fi "fi" +OpName %U_t "U_t" +OpMemberName %U_t 0 "g_F" +OpMemberName %U_t 1 "g_F2" +OpName %_ "" +OpName %f2 "f2" +OpName %gl_FragColor "gl_FragColor" +OpMemberDecorate %U_t 0 Offset 0 +OpMemberDecorate %U_t 1 Offset 4 +OpDecorate %U_t Block +OpDecorate %_ DescriptorSet 0 +%void = OpTypeVoid +%11 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%BaseColor = OpVariable %_ptr_Input_v4float Input +%_ptr_Input_float = OpTypePointer Input %float +%fi = OpVariable %_ptr_Input_float Input +%float_0 = OpConstant %float 0 +%bool = OpTypeBool +%U_t = OpTypeStruct %float %float +%_ptr_Uniform_U_t = OpTypePointer Uniform %U_t +%_ = OpVariable %_ptr_Uniform_U_t Uniform +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Function_float = OpTypePointer Function %float +%int_1 = OpConstant %int 1 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%gl_FragColor = OpVariable %_ptr_Output_v4float Output +)"; + + const std::string before = + R"(%main = OpFunction %void None %11 +%26 = OpLabel +%v = OpVariable %_ptr_Function_v4float Function +%f2 = OpVariable %_ptr_Function_float Function +%27 = OpLoad %v4float %BaseColor +OpStore %v %27 +%28 = OpLoad %float %fi +%29 = OpFOrdGreaterThan %bool %28 %float_0 +OpSelectionMerge %30 None +OpBranchConditional %29 %31 %32 +%31 = OpLabel +%33 = OpLoad %v4float %v +%34 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%35 = OpLoad %float %34 +%36 = OpVectorTimesScalar %v4float %33 %35 +OpStore %v %36 +OpBranch %30 +%32 = OpLabel +%37 = OpAccessChain %_ptr_Uniform_float %_ %int_1 +%38 = OpLoad %float %37 +%39 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%40 = OpLoad %float %39 +%41 = OpFSub %float %38 %40 +OpStore %f2 %41 +%42 = OpLoad %v4float %v +%43 = OpLoad %float %f2 +%44 = OpVectorTimesScalar %v4float %42 %43 +OpStore %v %44 +OpBranch %30 +%30 = OpLabel +%45 = OpLoad %v4float %v +OpStore %gl_FragColor %45 +OpReturn +OpFunctionEnd +)"; + + const std::string after = + R"(%main = OpFunction %void None %11 +%26 = OpLabel +%v = OpVariable %_ptr_Function_v4float Function +%f2 = OpVariable %_ptr_Function_float Function +%52 = OpLoad %U_t %_ +%53 = OpCompositeExtract %float %52 0 +%27 = OpLoad %v4float %BaseColor +OpStore %v %27 +%28 = OpLoad %float %fi +%29 = OpFOrdGreaterThan %bool %28 %float_0 +OpSelectionMerge %30 None +OpBranchConditional %29 %31 %32 +%31 = OpLabel +%33 = OpLoad %v4float %v +%36 = OpVectorTimesScalar %v4float %33 %53 +OpStore %v %36 +OpBranch %30 +%32 = OpLabel +%49 = OpCompositeExtract %float %52 1 +%41 = OpFSub %float %49 %53 +OpStore %f2 %41 +%42 = OpLoad %v4float %v +%43 = OpLoad %float %f2 +%44 = OpVectorTimesScalar %v4float %42 %43 +OpStore %v %44 +OpBranch %30 +%30 = OpLabel +%45 = OpLoad %v4float %v +OpStore %gl_FragColor %45 +OpReturn +OpFunctionEnd +)"; + + SinglePassRunAndCheck( + predefs + before, predefs + after, true, true); +} + +TEST_F(CommonUniformElimTest, Basic2) { + // Note: This test exemplifies the following: + // - Common uniform (%_) load floated to nearest non-controlled block + // - Common extract (g_F) floated to non-controlled block + // - Non-common extract (g_F2) not floated, but common uniform load shared + // + // #version 140 + // in vec4 BaseColor; + // in float fi; + // in float fi2; + // + // layout(std140) uniform U_t + // { + // float g_F; + // float g_F2; + // } ; + // + // void main() + // { + // float f = fi; + // if (f < 0) + // f = -f; + // if (fi2 > 0) { + // f = f * g_F; + // } + // else { + // f = g_F2 - g_F; + // } + // gl_FragColor = f * BaseColor; + // } + + const std::string predefs = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %fi %fi2 %gl_FragColor %BaseColor +OpExecutionMode %main OriginUpperLeft +OpSource GLSL 140 +OpName %main "main" +OpName %f "f" +OpName %fi "fi" +OpName %fi2 "fi2" +OpName %U_t "U_t" +OpMemberName %U_t 0 "g_F" +OpMemberName %U_t 1 "g_F2" +OpName %_ "" +OpName %gl_FragColor "gl_FragColor" +OpName %BaseColor "BaseColor" +OpMemberDecorate %U_t 0 Offset 0 +OpMemberDecorate %U_t 1 Offset 4 +OpDecorate %U_t Block +OpDecorate %_ DescriptorSet 0 +%void = OpTypeVoid +%11 = OpTypeFunction %void +%float = OpTypeFloat 32 +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Input_float = OpTypePointer Input %float +%fi = OpVariable %_ptr_Input_float Input +%float_0 = OpConstant %float 0 +%bool = OpTypeBool +%fi2 = OpVariable %_ptr_Input_float Input +%U_t = OpTypeStruct %float %float +%_ptr_Uniform_U_t = OpTypePointer Uniform %U_t +%_ = OpVariable %_ptr_Uniform_U_t Uniform +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%_ptr_Uniform_float = OpTypePointer Uniform %float +%int_1 = OpConstant %int 1 +%v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%gl_FragColor = OpVariable %_ptr_Output_v4float Output +%_ptr_Input_v4float = OpTypePointer Input %v4float +%BaseColor = OpVariable %_ptr_Input_v4float Input +)"; + + const std::string before = + R"(%main = OpFunction %void None %11 +%25 = OpLabel +%f = OpVariable %_ptr_Function_float Function +%26 = OpLoad %float %fi +OpStore %f %26 +%27 = OpLoad %float %f +%28 = OpFOrdLessThan %bool %27 %float_0 +OpSelectionMerge %29 None +OpBranchConditional %28 %30 %29 +%30 = OpLabel +%31 = OpLoad %float %f +%32 = OpFNegate %float %31 +OpStore %f %32 +OpBranch %29 +%29 = OpLabel +%33 = OpLoad %float %fi2 +%34 = OpFOrdGreaterThan %bool %33 %float_0 +OpSelectionMerge %35 None +OpBranchConditional %34 %36 %37 +%36 = OpLabel +%38 = OpLoad %float %f +%39 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%40 = OpLoad %float %39 +%41 = OpFMul %float %38 %40 +OpStore %f %41 +OpBranch %35 +%37 = OpLabel +%42 = OpAccessChain %_ptr_Uniform_float %_ %int_1 +%43 = OpLoad %float %42 +%44 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%45 = OpLoad %float %44 +%46 = OpFSub %float %43 %45 +OpStore %f %46 +OpBranch %35 +%35 = OpLabel +%47 = OpLoad %v4float %BaseColor +%48 = OpLoad %float %f +%49 = OpVectorTimesScalar %v4float %47 %48 +OpStore %gl_FragColor %49 +OpReturn +OpFunctionEnd +)"; + + const std::string after = + R"(%main = OpFunction %void None %11 +%25 = OpLabel +%f = OpVariable %_ptr_Function_float Function +%26 = OpLoad %float %fi +OpStore %f %26 +%27 = OpLoad %float %f +%28 = OpFOrdLessThan %bool %27 %float_0 +OpSelectionMerge %29 None +OpBranchConditional %28 %30 %29 +%30 = OpLabel +%31 = OpLoad %float %f +%32 = OpFNegate %float %31 +OpStore %f %32 +OpBranch %29 +%29 = OpLabel +%56 = OpLoad %U_t %_ +%57 = OpCompositeExtract %float %56 0 +%33 = OpLoad %float %fi2 +%34 = OpFOrdGreaterThan %bool %33 %float_0 +OpSelectionMerge %35 None +OpBranchConditional %34 %36 %37 +%36 = OpLabel +%38 = OpLoad %float %f +%41 = OpFMul %float %38 %57 +OpStore %f %41 +OpBranch %35 +%37 = OpLabel +%53 = OpCompositeExtract %float %56 1 +%46 = OpFSub %float %53 %57 +OpStore %f %46 +OpBranch %35 +%35 = OpLabel +%47 = OpLoad %v4float %BaseColor +%48 = OpLoad %float %f +%49 = OpVectorTimesScalar %v4float %47 %48 +OpStore %gl_FragColor %49 +OpReturn +OpFunctionEnd +)"; + + SinglePassRunAndCheck( + predefs + before, predefs + after, true, true); +} + +TEST_F(CommonUniformElimTest, Basic3) { + // Note: This test exemplifies the following: + // - Existing common uniform (%_) load kept in place and shared + // + // #version 140 + // in vec4 BaseColor; + // in float fi; + // + // layout(std140) uniform U_t + // { + // bool g_B; + // float g_F; + // } ; + // + // void main() + // { + // vec4 v = BaseColor; + // if (g_B) + // v = v * g_F; + // gl_FragColor = v; + // } + + const std::string predefs = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %BaseColor %gl_FragColor %fi +OpExecutionMode %main OriginUpperLeft +OpSource GLSL 140 +OpName %main "main" +OpName %v "v" +OpName %BaseColor "BaseColor" +OpName %U_t "U_t" +OpMemberName %U_t 0 "g_B" +OpMemberName %U_t 1 "g_F" +OpName %_ "" +OpName %gl_FragColor "gl_FragColor" +OpName %fi "fi" +OpMemberDecorate %U_t 0 Offset 0 +OpMemberDecorate %U_t 1 Offset 4 +OpDecorate %U_t Block +OpDecorate %_ DescriptorSet 0 +%void = OpTypeVoid +%10 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%BaseColor = OpVariable %_ptr_Input_v4float Input +%uint = OpTypeInt 32 0 +%U_t = OpTypeStruct %uint %float +%_ptr_Uniform_U_t = OpTypePointer Uniform %U_t +%_ = OpVariable %_ptr_Uniform_U_t Uniform +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%bool = OpTypeBool +%uint_0 = OpConstant %uint 0 +%int_1 = OpConstant %int 1 +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Output_v4float = OpTypePointer Output %v4float +%gl_FragColor = OpVariable %_ptr_Output_v4float Output +%_ptr_Input_float = OpTypePointer Input %float +%fi = OpVariable %_ptr_Input_float Input +)"; + + const std::string before = + R"(%main = OpFunction %void None %10 +%26 = OpLabel +%v = OpVariable %_ptr_Function_v4float Function +%27 = OpLoad %v4float %BaseColor +OpStore %v %27 +%28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 +%29 = OpLoad %uint %28 +%30 = OpINotEqual %bool %29 %uint_0 +OpSelectionMerge %31 None +OpBranchConditional %30 %32 %31 +%32 = OpLabel +%33 = OpLoad %v4float %v +%34 = OpAccessChain %_ptr_Uniform_float %_ %int_1 +%35 = OpLoad %float %34 +%36 = OpVectorTimesScalar %v4float %33 %35 +OpStore %v %36 +OpBranch %31 +%31 = OpLabel +%37 = OpLoad %v4float %v +OpStore %gl_FragColor %37 +OpReturn +OpFunctionEnd +)"; + + const std::string after = + R"(%main = OpFunction %void None %10 +%26 = OpLabel +%v = OpVariable %_ptr_Function_v4float Function +%27 = OpLoad %v4float %BaseColor +OpStore %v %27 +%38 = OpLoad %U_t %_ +%39 = OpCompositeExtract %uint %38 0 +%30 = OpINotEqual %bool %39 %uint_0 +OpSelectionMerge %31 None +OpBranchConditional %30 %32 %31 +%32 = OpLabel +%33 = OpLoad %v4float %v +%41 = OpCompositeExtract %float %38 1 +%36 = OpVectorTimesScalar %v4float %33 %41 +OpStore %v %36 +OpBranch %31 +%31 = OpLabel +%37 = OpLoad %v4float %v +OpStore %gl_FragColor %37 +OpReturn +OpFunctionEnd +)"; + + SinglePassRunAndCheck( + predefs + before, predefs + after, true, true); +} + +TEST_F(CommonUniformElimTest, Loop) { + // Note: This test exemplifies the following: + // - Common extract (g_F) shared between two loops + // #version 140 + // in vec4 BC; + // in vec4 BC2; + // + // layout(std140) uniform U_t + // { + // float g_F; + // } ; + // + // void main() + // { + // vec4 v = BC; + // for (int i = 0; i < 4; i++) + // v[i] = v[i] / g_F; + // vec4 v2 = BC2; + // for (int i = 0; i < 4; i++) + // v2[i] = v2[i] * g_F; + // gl_FragColor = v + v2; + // } + + const std::string predefs = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %BC %BC2 %gl_FragColor +OpExecutionMode %main OriginUpperLeft +OpSource GLSL 140 +OpName %main "main" +OpName %v "v" +OpName %BC "BC" +OpName %i "i" +OpName %U_t "U_t" +OpMemberName %U_t 0 "g_F" +OpName %_ "" +OpName %v2 "v2" +OpName %BC2 "BC2" +OpName %i_0 "i" +OpName %gl_FragColor "gl_FragColor" +OpMemberDecorate %U_t 0 Offset 0 +OpDecorate %U_t Block +OpDecorate %_ DescriptorSet 0 +%void = OpTypeVoid +%13 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%BC = OpVariable %_ptr_Input_v4float Input +%int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int +%int_0 = OpConstant %int 0 +%int_4 = OpConstant %int 4 +%bool = OpTypeBool +%_ptr_Function_float = OpTypePointer Function %float +%U_t = OpTypeStruct %float +%_ptr_Uniform_U_t = OpTypePointer Uniform %U_t +%_ = OpVariable %_ptr_Uniform_U_t Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%int_1 = OpConstant %int 1 +%BC2 = OpVariable %_ptr_Input_v4float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%gl_FragColor = OpVariable %_ptr_Output_v4float Output +)"; + + const std::string before = + R"(%main = OpFunction %void None %13 +%28 = OpLabel +%v = OpVariable %_ptr_Function_v4float Function +%i = OpVariable %_ptr_Function_int Function +%v2 = OpVariable %_ptr_Function_v4float Function +%i_0 = OpVariable %_ptr_Function_int Function +%29 = OpLoad %v4float %BC +OpStore %v %29 +OpStore %i %int_0 +OpBranch %30 +%30 = OpLabel +OpLoopMerge %31 %32 None +OpBranch %33 +%33 = OpLabel +%34 = OpLoad %int %i +%35 = OpSLessThan %bool %34 %int_4 +OpBranchConditional %35 %36 %31 +%36 = OpLabel +%37 = OpLoad %int %i +%38 = OpLoad %int %i +%39 = OpAccessChain %_ptr_Function_float %v %38 +%40 = OpLoad %float %39 +%41 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%42 = OpLoad %float %41 +%43 = OpFDiv %float %40 %42 +%44 = OpAccessChain %_ptr_Function_float %v %37 +OpStore %44 %43 +OpBranch %32 +%32 = OpLabel +%45 = OpLoad %int %i +%46 = OpIAdd %int %45 %int_1 +OpStore %i %46 +OpBranch %30 +%31 = OpLabel +%47 = OpLoad %v4float %BC2 +OpStore %v2 %47 +OpStore %i_0 %int_0 +OpBranch %48 +%48 = OpLabel +OpLoopMerge %49 %50 None +OpBranch %51 +%51 = OpLabel +%52 = OpLoad %int %i_0 +%53 = OpSLessThan %bool %52 %int_4 +OpBranchConditional %53 %54 %49 +%54 = OpLabel +%55 = OpLoad %int %i_0 +%56 = OpLoad %int %i_0 +%57 = OpAccessChain %_ptr_Function_float %v2 %56 +%58 = OpLoad %float %57 +%59 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%60 = OpLoad %float %59 +%61 = OpFMul %float %58 %60 +%62 = OpAccessChain %_ptr_Function_float %v2 %55 +OpStore %62 %61 +OpBranch %50 +%50 = OpLabel +%63 = OpLoad %int %i_0 +%64 = OpIAdd %int %63 %int_1 +OpStore %i_0 %64 +OpBranch %48 +%49 = OpLabel +%65 = OpLoad %v4float %v +%66 = OpLoad %v4float %v2 +%67 = OpFAdd %v4float %65 %66 +OpStore %gl_FragColor %67 +OpReturn +OpFunctionEnd +)"; + + const std::string after = + R"(%main = OpFunction %void None %13 +%28 = OpLabel +%v = OpVariable %_ptr_Function_v4float Function +%i = OpVariable %_ptr_Function_int Function +%v2 = OpVariable %_ptr_Function_v4float Function +%i_0 = OpVariable %_ptr_Function_int Function +%72 = OpLoad %U_t %_ +%73 = OpCompositeExtract %float %72 0 +%29 = OpLoad %v4float %BC +OpStore %v %29 +OpStore %i %int_0 +OpBranch %30 +%30 = OpLabel +OpLoopMerge %31 %32 None +OpBranch %33 +%33 = OpLabel +%34 = OpLoad %int %i +%35 = OpSLessThan %bool %34 %int_4 +OpBranchConditional %35 %36 %31 +%36 = OpLabel +%37 = OpLoad %int %i +%38 = OpLoad %int %i +%39 = OpAccessChain %_ptr_Function_float %v %38 +%40 = OpLoad %float %39 +%43 = OpFDiv %float %40 %73 +%44 = OpAccessChain %_ptr_Function_float %v %37 +OpStore %44 %43 +OpBranch %32 +%32 = OpLabel +%45 = OpLoad %int %i +%46 = OpIAdd %int %45 %int_1 +OpStore %i %46 +OpBranch %30 +%31 = OpLabel +%47 = OpLoad %v4float %BC2 +OpStore %v2 %47 +OpStore %i_0 %int_0 +OpBranch %48 +%48 = OpLabel +OpLoopMerge %49 %50 None +OpBranch %51 +%51 = OpLabel +%52 = OpLoad %int %i_0 +%53 = OpSLessThan %bool %52 %int_4 +OpBranchConditional %53 %54 %49 +%54 = OpLabel +%55 = OpLoad %int %i_0 +%56 = OpLoad %int %i_0 +%57 = OpAccessChain %_ptr_Function_float %v2 %56 +%58 = OpLoad %float %57 +%61 = OpFMul %float %58 %73 +%62 = OpAccessChain %_ptr_Function_float %v2 %55 +OpStore %62 %61 +OpBranch %50 +%50 = OpLabel +%63 = OpLoad %int %i_0 +%64 = OpIAdd %int %63 %int_1 +OpStore %i_0 %64 +OpBranch %48 +%49 = OpLabel +%65 = OpLoad %v4float %v +%66 = OpLoad %v4float %v2 +%67 = OpFAdd %v4float %65 %66 +OpStore %gl_FragColor %67 +OpReturn +OpFunctionEnd +)"; + + SinglePassRunAndCheck( + predefs + before, predefs + after, true, true); +} + +// TODO(greg-lunarg): Add tests to verify handling of these cases: +// +// Disqualifying cases: extensions, decorations, non-logical addressing, +// non-structured control flow +// Others? + +} // anonymous namespace diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index 8b39af98..3b397494 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -76,6 +76,12 @@ Options: equivalent load/stores with inserts and extracts. Performed on function scope variables referenced only with load, store, and constant index access chains. + --eliminate-common-uniform + Perform load/load elimination for duplicate uniform values. + Converts any constant index access chain uniform loads into + its equivalent load and extract. Some loads will be moved + to facilitate sharing. Performed only on entry point + functions. --eliminate-local-single-block Perform single-block store/load and load/load elimination. Performed only on function scope variables in entry point @@ -182,6 +188,8 @@ int main(int argc, char** argv) { optimizer.RegisterPass(CreateDeadBranchElimPass()); } else if (0 == strcmp(cur_arg, "--eliminate-local-multi-store")) { optimizer.RegisterPass(CreateLocalMultiStoreElimPass()); + } else if (0 == strcmp(cur_arg, "--eliminate-common-uniform")) { + optimizer.RegisterPass(CreateCommonUniformElimPass()); } else if (0 == strcmp(cur_arg, "--eliminate-dead-const")) { optimizer.RegisterPass(CreateEliminateDeadConstantPass()); } else if (0 == strcmp(cur_arg, "--fold-spec-const-op-composite")) { -- 2.34.1