From 7c8da66bc27cc5c4ccb6a0fa612f56c9417518ff Mon Sep 17 00:00:00 2001 From: GregF Date: Thu, 18 May 2017 14:51:55 -0600 Subject: [PATCH] mem2reg: Add pass to eliminate local loads and stores in single block. --- include/spirv-tools/optimizer.hpp | 20 ++ source/opt/CMakeLists.txt | 2 + source/opt/local_single_block_elim_pass.cpp | 344 ++++++++++++++++++++ source/opt/local_single_block_elim_pass.h | 153 +++++++++ source/opt/optimizer.cpp | 7 +- source/opt/passes.h | 1 + test/opt/CMakeLists.txt | 5 + test/opt/local_single_block_elim.cpp | 469 ++++++++++++++++++++++++++++ tools/opt/opt.cpp | 2 + 9 files changed, 1002 insertions(+), 1 deletion(-) create mode 100644 source/opt/local_single_block_elim_pass.cpp create mode 100644 source/opt/local_single_block_elim_pass.h create mode 100644 test/opt/local_single_block_elim.cpp diff --git a/include/spirv-tools/optimizer.hpp b/include/spirv-tools/optimizer.hpp index 6695573..099a944 100644 --- a/include/spirv-tools/optimizer.hpp +++ b/include/spirv-tools/optimizer.hpp @@ -193,6 +193,26 @@ Optimizer::PassToken CreateEliminateDeadConstantPass(); // size or runtime performance. Functions that are not designated as entry // points are not changed. Optimizer::PassToken CreateInlinePass(); + +// Creates a single-block local variable load/store elimination pass. +// For every entry point function, do single block memory optimization of +// function variables referenced only with non-access-chain loads and stores. +// For each targeted variable load, if previous store to that variable in the +// block, replace the load's result id with the value id of the store. +// If previous load within the block, replace the current load's result id +// with the previous load's result id. In either case, delete the current +// load. Finally, check if any remaining stores are useless, and delete store +// and variable if possible. +// +// The presence of access chain references and function calls can inhibit +// the above optimization. +// +// Only modules with logical addressing are currently processed. +// +// This pass is most effective if preceeded by Inlining and +// LocalAccessChainConvert. This pass will reduce the work needed to be done +// by LocalSingleStoreElim and LocalSSARewrite. +Optimizer::PassToken CreateLocalSingleBlockLoadStoreElimPass(); // Creates a local access chain conversion pass. // A local access chain conversion pass identifies all function scope diff --git a/source/opt/CMakeLists.txt b/source/opt/CMakeLists.txt index 2a9a61b..64fdc02 100644 --- a/source/opt/CMakeLists.txt +++ b/source/opt/CMakeLists.txt @@ -26,6 +26,7 @@ add_library(SPIRV-Tools-opt instruction.h ir_loader.h local_access_chain_convert_pass.h + local_single_block_elim_pass.h log.h module.h null_pass.h @@ -52,6 +53,7 @@ add_library(SPIRV-Tools-opt instruction.cpp ir_loader.cpp local_access_chain_convert_pass.cpp + local_single_block_elim_pass.cpp module.cpp set_spec_constant_default_value_pass.cpp optimizer.cpp diff --git a/source/opt/local_single_block_elim_pass.cpp b/source/opt/local_single_block_elim_pass.cpp new file mode 100644 index 0000000..b18b08d --- /dev/null +++ b/source/opt/local_single_block_elim_pass.cpp @@ -0,0 +1,344 @@ +// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2017 Valve Corporation +// Copyright (c) 2017 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "iterator.h" +#include "local_single_block_elim_pass.h" + +static const int kSpvEntryPointFunctionId = 1; +static const int kSpvStorePtrId = 0; +static const int kSpvStoreValId = 1; +static const int kSpvLoadPtrId = 0; +static const int kSpvAccessChainPtrId = 0; +static const int kSpvTypePointerStorageClass = 0; +static const int kSpvTypePointerTypeId = 1; + +namespace spvtools { +namespace opt { + +bool LocalSingleBlockLoadStoreElimPass::IsNonPtrAccessChain( + const SpvOp opcode) const { + return opcode == SpvOpAccessChain || opcode == SpvOpInBoundsAccessChain; +} + +bool LocalSingleBlockLoadStoreElimPass::IsMathType( + const ir::Instruction* typeInst) const { + switch (typeInst->opcode()) { + case SpvOpTypeInt: + case SpvOpTypeFloat: + case SpvOpTypeBool: + case SpvOpTypeVector: + case SpvOpTypeMatrix: + return true; + default: + break; + } + return false; +} + +bool LocalSingleBlockLoadStoreElimPass::IsTargetType( + const ir::Instruction* typeInst) const { + if (IsMathType(typeInst)) + return true; + if (typeInst->opcode() == SpvOpTypeArray) + return IsMathType(def_use_mgr_->GetDef(typeInst->GetSingleWordOperand(1))); + if (typeInst->opcode() != SpvOpTypeStruct) + return false; + // All struct members must be math type + int nonMathComp = 0; + typeInst->ForEachInId([&nonMathComp,this](const uint32_t* tid) { + ir::Instruction* compTypeInst = def_use_mgr_->GetDef(*tid); + if (!IsMathType(compTypeInst)) ++nonMathComp; + }); + return nonMathComp == 0; +} + +ir::Instruction* LocalSingleBlockLoadStoreElimPass::GetPtr( + ir::Instruction* ip, uint32_t* varId) { + *varId = ip->GetSingleWordInOperand( + ip->opcode() == SpvOpStore ? kSpvStorePtrId : kSpvLoadPtrId); + ir::Instruction* ptrInst = def_use_mgr_->GetDef(*varId); + ir::Instruction* varInst = ptrInst; + while (IsNonPtrAccessChain(varInst->opcode())) { + *varId = varInst->GetSingleWordInOperand(kSpvAccessChainPtrId); + varInst = def_use_mgr_->GetDef(*varId); + } + return ptrInst; +} + +bool LocalSingleBlockLoadStoreElimPass::IsTargetVar(uint32_t varId) { + if (seen_non_target_vars_.find(varId) != seen_non_target_vars_.end()) + return false; + if (seen_target_vars_.find(varId) != seen_target_vars_.end()) + return true; + const ir::Instruction* varInst = def_use_mgr_->GetDef(varId); + assert(varInst->opcode() == SpvOpVariable); + const uint32_t varTypeId = varInst->type_id(); + const ir::Instruction* varTypeInst = def_use_mgr_->GetDef(varTypeId); + if (varTypeInst->GetSingleWordInOperand(kSpvTypePointerStorageClass) != + SpvStorageClassFunction) { + seen_non_target_vars_.insert(varId); + return false; + } + const uint32_t varPteTypeId = + varTypeInst->GetSingleWordInOperand(kSpvTypePointerTypeId); + ir::Instruction* varPteTypeInst = def_use_mgr_->GetDef(varPteTypeId); + if (!IsTargetType(varPteTypeInst)) { + seen_non_target_vars_.insert(varId); + return false; + } + seen_target_vars_.insert(varId); + return true; +} + +void LocalSingleBlockLoadStoreElimPass::ReplaceAndDeleteLoad( + ir::Instruction* loadInst, uint32_t replId) { + const uint32_t loadId = loadInst->result_id(); + (void) def_use_mgr_->ReplaceAllUsesWith(loadId, replId); + // TODO(greg-lunarg): Consider moving DCE into separate pass + DCEInst(loadInst); +} + +bool LocalSingleBlockLoadStoreElimPass::HasLoads(uint32_t ptrId) const { + analysis::UseList* uses = def_use_mgr_->GetUses(ptrId); + if (uses == nullptr) + return false; + for (auto u : *uses) { + SpvOp op = u.inst->opcode(); + if (IsNonPtrAccessChain(op)) { + if (HasLoads(u.inst->result_id())) + return true; + } + else { + // Conservatively assume that calls will do a load + // TODO(): Improve analysis around function calls + if (op == SpvOpLoad || op == SpvOpFunctionCall) + return true; + } + } + return false; +} + +bool LocalSingleBlockLoadStoreElimPass::IsLiveVar(uint32_t varId) const { + // non-function scope vars are live + const ir::Instruction* varInst = def_use_mgr_->GetDef(varId); + assert(varInst->opcode() == SpvOpVariable); + const uint32_t varTypeId = varInst->type_id(); + const ir::Instruction* varTypeInst = def_use_mgr_->GetDef(varTypeId); + if (varTypeInst->GetSingleWordInOperand(kSpvTypePointerStorageClass) != + SpvStorageClassFunction) + return true; + // test if variable is loaded from + return HasLoads(varId); +} + +bool LocalSingleBlockLoadStoreElimPass::IsLiveStore( + ir::Instruction* storeInst) { + // get store's variable + uint32_t varId; + (void) GetPtr(storeInst, &varId); + return IsLiveVar(varId); +} + +void LocalSingleBlockLoadStoreElimPass::AddStores( + uint32_t ptr_id, std::queue* insts) { + analysis::UseList* uses = def_use_mgr_->GetUses(ptr_id); + if (uses != nullptr) { + for (auto u : *uses) { + if (IsNonPtrAccessChain(u.inst->opcode())) + AddStores(u.inst->result_id(), insts); + else if (u.inst->opcode() == SpvOpStore) + insts->push(u.inst); + } + } +} + +void LocalSingleBlockLoadStoreElimPass::DCEInst(ir::Instruction* inst) { + std::queue deadInsts; + deadInsts.push(inst); + while (!deadInsts.empty()) { + ir::Instruction* di = deadInsts.front(); + // Don't delete labels + if (di->opcode() == SpvOpLabel) { + deadInsts.pop(); + continue; + } + // Remember operands + std::vector ids; + di->ForEachInId([&ids](uint32_t* iid) { + ids.push_back(*iid); + }); + uint32_t varId = 0; + // Remember variable if dead load + if (di->opcode() == SpvOpLoad) + (void) GetPtr(di, &varId); + def_use_mgr_->KillInst(di); + // For all operands with no remaining uses, add their instruction + // to the dead instruction queue. + for (auto id : ids) { + analysis::UseList* uses = def_use_mgr_->GetUses(id); + if (uses == nullptr) + deadInsts.push(def_use_mgr_->GetDef(id)); + } + // if a load was deleted and it was the variable's + // last load, add all its stores to dead queue + if (varId != 0 && !IsLiveVar(varId)) + AddStores(varId, &deadInsts); + deadInsts.pop(); + } +} + +bool LocalSingleBlockLoadStoreElimPass::LocalSingleBlockLoadStoreElim( + ir::Function* func) { + // Verify no CopyObject ops in function. This is a pre-SSA pass and + // is generally not useful for code already in CSSA form. + for (auto& blk : *func) + for (auto& inst : blk) + if (inst.opcode() == SpvOpCopyObject) + return false; + // Perform local store/load and load/load elimination on each block + bool modified = false; + for (auto bi = func->begin(); bi != func->end(); ++bi) { + var2store_.clear(); + var2load_.clear(); + pinned_vars_.clear(); + for (auto ii = bi->begin(); ii != bi->end(); ++ii) { + switch (ii->opcode()) { + case SpvOpStore: { + // Verify store variable is target type + uint32_t varId; + ir::Instruction* ptrInst = GetPtr(&*ii, &varId); + if (!IsTargetVar(varId)) + continue; + // Register the store + if (ptrInst->opcode() == SpvOpVariable) { + // if not pinned, look for WAW + if (pinned_vars_.find(varId) == pinned_vars_.end()) { + auto si = var2store_.find(varId); + if (si != var2store_.end()) { + def_use_mgr_->KillInst(si->second); + } + } + var2store_[varId] = &*ii; + } + else { + assert(IsNonPtrAccessChain(ptrInst->opcode())); + var2store_.erase(varId); + } + pinned_vars_.erase(varId); + var2load_.erase(varId); + } break; + case SpvOpLoad: { + // Verify store variable is target type + uint32_t varId; + ir::Instruction* ptrInst = GetPtr(&*ii, &varId); + if (!IsTargetVar(varId)) + continue; + // Look for previous store or load + uint32_t replId = 0; + if (ptrInst->opcode() == SpvOpVariable) { + auto si = var2store_.find(varId); + if (si != var2store_.end()) { + replId = si->second->GetSingleWordInOperand(kSpvStoreValId); + } + else { + auto li = var2load_.find(varId); + if (li != var2load_.end()) { + replId = li->second->result_id(); + } + } + } + if (replId != 0) { + // replace load's result id and delete load + ReplaceAndDeleteLoad(&*ii, replId); + modified = true; + } + else { + if (ptrInst->opcode() == SpvOpVariable) + var2load_[varId] = &*ii; // register load + pinned_vars_.insert(varId); + } + } break; + case SpvOpFunctionCall: { + // Conservatively assume all locals are redefined for now. + // TODO(): Handle more optimally + var2store_.clear(); + var2load_.clear(); + pinned_vars_.clear(); + } break; + default: + break; + } + } + // Go back and delete useless stores in block + // TODO(greg-lunarg): Consider moving DCE into separate pass + for (auto ii = bi->begin(); ii != bi->end(); ++ii) { + if (ii->opcode() != SpvOpStore) + continue; + if (IsLiveStore(&*ii)) + continue; + DCEInst(&*ii); + } + } + return modified; +} + +void LocalSingleBlockLoadStoreElimPass::Initialize(ir::Module* module) { + + module_ = module; + + // Initialize function and block maps + id2function_.clear(); + for (auto& fn : *module_) + id2function_[fn.result_id()] = &fn; + + // Initialize Target Type Caches + seen_target_vars_.clear(); + seen_non_target_vars_.clear(); + + // TODO(): Reuse def/use from previous passes + def_use_mgr_.reset(new analysis::DefUseManager(consumer(), module_)); + + // Start new ids with next availablein module + next_id_ = module_->id_bound(); + +}; + +Pass::Status LocalSingleBlockLoadStoreElimPass::ProcessImpl() { + // Assumes logical addressing only + if (module_->HasCapability(SpvCapabilityAddresses)) + return Status::SuccessWithoutChange; + bool modified = false; + // Call Mem2Reg on all remaining functions. + for (auto& e : module_->entry_points()) { + ir::Function* fn = + id2function_[e.GetSingleWordOperand(kSpvEntryPointFunctionId)]; + modified = modified || LocalSingleBlockLoadStoreElim(fn); + } + FinalizeNextId(module_); + return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange; +} + +LocalSingleBlockLoadStoreElimPass::LocalSingleBlockLoadStoreElimPass() + : module_(nullptr), def_use_mgr_(nullptr), next_id_(0) {} + +Pass::Status LocalSingleBlockLoadStoreElimPass::Process(ir::Module* module) { + Initialize(module); + return ProcessImpl(); +} + +} // namespace opt +} // namespace spvtools + diff --git a/source/opt/local_single_block_elim_pass.h b/source/opt/local_single_block_elim_pass.h new file mode 100644 index 0000000..b5a14f4 --- /dev/null +++ b/source/opt/local_single_block_elim_pass.h @@ -0,0 +1,153 @@ +// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2017 Valve Corporation +// Copyright (c) 2017 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_OPT_LOCAL_SINGLE_BLOCK_ELIM_PASS_H_ +#define LIBSPIRV_OPT_LOCAL_SINGLE_BLOCK_ELIM_PASS_H_ + + +#include +#include +#include +#include +#include +#include + +#include "basic_block.h" +#include "def_use_manager.h" +#include "module.h" +#include "pass.h" + +namespace spvtools { +namespace opt { + +// See optimizer.hpp for documentation. +class LocalSingleBlockLoadStoreElimPass : public Pass { + public: + LocalSingleBlockLoadStoreElimPass(); + const char* name() const override { return "eliminate-local-single-block"; } + Status Process(ir::Module*) override; + + private: + // Returns true if |opcode| is a non-ptr access chain op + bool IsNonPtrAccessChain(const SpvOp opcode) const; + + // Returns true if |typeInst| is a scalar type + // or a vector or matrix + bool IsMathType(const ir::Instruction* typeInst) const; + + // Returns true if |typeInst| is a math type or a struct or array + // of a math type. + bool IsTargetType(const ir::Instruction* typeInst) const; + + // Given a load or store |ip|, return the pointer instruction. + // Also return the base variable's id in |varId|. + ir::Instruction* GetPtr(ir::Instruction* ip, uint32_t* varId); + + // Return true if |varId| is a previously identified target variable. + // Return false if |varId| is a previously identified non-target variable. + // If variable is not cached, return true if variable is a function scope + // variable of target type, false otherwise. Updates caches of target + // and non-target variables. + bool IsTargetVar(uint32_t varId); + + // Replace all instances of |loadInst|'s id with |replId| and delete + // |loadInst|. + void ReplaceAndDeleteLoad(ir::Instruction* loadInst, uint32_t replId); + + // Return true if any instruction loads from |ptrId| + bool HasLoads(uint32_t ptrId) const; + + // Return true if |varId| is not a function variable or if it has + // a load + bool IsLiveVar(uint32_t varId) const; + + // Return true if |storeInst| is not to function variable or if its + // base variable has a load + bool IsLiveStore(ir::Instruction* storeInst); + + // Add stores using |ptr_id| to |insts| + void AddStores(uint32_t ptr_id, std::queue* insts); + + // Delete |inst| and iterate DCE on all its operands. Won't delete + // labels. + void DCEInst(ir::Instruction* inst); + + // On all entry point functions, within each basic block, eliminate + // loads and stores to function variables where possible. For + // loads, if previous load or store to same variable, replace + // load id with previous id and delete load. Finally, check if + // remaining stores are useless, and delete store and variable + // where possible. Assumes logical addressing. + bool LocalSingleBlockLoadStoreElim(ir::Function* func); + + // Save next available id into |module|. + inline void FinalizeNextId(ir::Module* module) { + module->SetIdBound(next_id_); + } + + // Return next available id and calculate next. + inline uint32_t TakeNextId() { + return next_id_++; + } + + void Initialize(ir::Module* module); + Pass::Status ProcessImpl(); + + // Module this pass is processing + ir::Module* module_; + + // Def-Uses for the module we are processing + std::unique_ptr def_use_mgr_; + + // Map from function's result id to function + std::unordered_map id2function_; + + // Cache of previously seen target types + std::unordered_set seen_target_vars_; + + // Cache of previously seen non-target types + std::unordered_set seen_non_target_vars_; + + // Map from function scope variable to a store of that variable in the + // current block whose value is currently valid. This map is cleared + // at the start of each block and incrementally updated as the block + // is scanned. The stores are candidates for elimination. The map is + // conservatively cleared when a function call is encountered. + std::unordered_map var2store_; + + // Map from function scope variable to a load of that variable in the + // current block whose value is currently valid. This map is cleared + // at the start of each block and incrementally updated as the block + // is scanned. The stores are candidates for elimination. The map is + // conservatively cleared when a function call is encountered. + std::unordered_map var2load_; + + // Set of variables whose most recent store in the current block cannot be + // deleted, for example, if there is a load of the variable which is + // dependent on the store and is not replaced and deleted by this pass, + // for example, a load through an access chain. A variable is removed + // from this set each time a new store of that variable is encountered. + std::unordered_set pinned_vars_; + + // Next unused ID + uint32_t next_id_; +}; + +} // namespace opt +} // namespace spvtools + +#endif // LIBSPIRV_OPT_LOCAL_SINGLE_BLOCK_ELIM_PASS_H_ + diff --git a/source/opt/optimizer.cpp b/source/opt/optimizer.cpp index 9fde8d3..cf9a825 100644 --- a/source/opt/optimizer.cpp +++ b/source/opt/optimizer.cpp @@ -135,11 +135,16 @@ Optimizer::PassToken CreateEliminateDeadConstantPass() { Optimizer::PassToken CreateInlinePass() { return MakeUnique(MakeUnique()); } - + Optimizer::PassToken CreateLocalAccessChainConvertPass() { return MakeUnique( MakeUnique()); } + +Optimizer::PassToken CreateLocalSingleBlockLoadStoreElimPass() { + return MakeUnique( + MakeUnique()); +} Optimizer::PassToken CreateCompactIdsPass() { return MakeUnique( diff --git a/source/opt/passes.h b/source/opt/passes.h index 3d19753..61361a7 100644 --- a/source/opt/passes.h +++ b/source/opt/passes.h @@ -22,6 +22,7 @@ #include "flatten_decoration_pass.h" #include "fold_spec_constant_op_and_composite_pass.h" #include "inline_pass.h" +#include "local_single_block_elim_pass.h" #include "freeze_spec_constant_value_pass.h" #include "local_access_chain_convert_pass.h" #include "null_pass.h" diff --git a/test/opt/CMakeLists.txt b/test/opt/CMakeLists.txt index 97eadb8..fcaefe2 100644 --- a/test/opt/CMakeLists.txt +++ b/test/opt/CMakeLists.txt @@ -58,6 +58,11 @@ add_spvtools_unittest(TARGET pass_inline LIBS SPIRV-Tools-opt ) +add_spvtools_unittest(TARGET pass_local_single_block_elim + SRCS local_single_block_elim.cpp pass_utils.cpp + LIBS SPIRV-Tools-opt +) + add_spvtools_unittest(TARGET pass_local_access_chain_convert SRCS local_access_chain_convert_test.cpp pass_utils.cpp LIBS SPIRV-Tools-opt diff --git a/test/opt/local_single_block_elim.cpp b/test/opt/local_single_block_elim.cpp new file mode 100644 index 0000000..8c193bd --- /dev/null +++ b/test/opt/local_single_block_elim.cpp @@ -0,0 +1,469 @@ +// Copyright (c) 2017 Valve Corporation +// Copyright (c) 2017 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "pass_fixture.h" +#include "pass_utils.h" + +template std::vector concat(const std::vector &a, const std::vector &b) { + std::vector ret = std::vector(); + std::copy(a.begin(), a.end(), back_inserter(ret)); + std::copy(b.begin(), b.end(), back_inserter(ret)); + return ret; +} + +namespace { + +using namespace spvtools; + +using LocalSingleBlockLoadStoreElimTest = PassTest<::testing::Test>; + +TEST_F(LocalSingleBlockLoadStoreElimTest, SimpleStoreLoadElim) { + // #version 140 + // + // in vec4 BaseColor; + // + // void main() + // { + // vec4 v = BaseColor; + // gl_FragColor = v; + // } + + const std::string predefs = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %BaseColor %gl_FragColor +OpExecutionMode %main OriginUpperLeft +OpSource GLSL 140 +OpName %main "main" +OpName %v "v" +OpName %BaseColor "BaseColor" +OpName %gl_FragColor "gl_FragColor" +%void = OpTypeVoid +%7 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%BaseColor = OpVariable %_ptr_Input_v4float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%gl_FragColor = OpVariable %_ptr_Output_v4float Output +)"; + + const std::string before = + R"(%main = OpFunction %void None %7 +%13 = OpLabel +%v = OpVariable %_ptr_Function_v4float Function +%14 = OpLoad %v4float %BaseColor +OpStore %v %14 +%15 = OpLoad %v4float %v +OpStore %gl_FragColor %15 +OpReturn +OpFunctionEnd +)"; + + const std::string after = + R"(%main = OpFunction %void None %7 +%13 = OpLabel +%v = OpVariable %_ptr_Function_v4float Function +%14 = OpLoad %v4float %BaseColor +OpStore %gl_FragColor %14 +OpReturn +OpFunctionEnd +)"; + + SinglePassRunAndCheck( + predefs + before, predefs + after, true, true); +} + +TEST_F(LocalSingleBlockLoadStoreElimTest, SimpleLoadLoadElim) { + // #version 140 + // + // in vec4 BaseColor; + // in float fi; + // + // void main() + // { + // vec4 v = BaseColor; + // if (fi < 0) + // v = vec4(0.0); + // gl_FragData[0] = v; + // gl_FragData[1] = v; + // } + + const std::string predefs = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %BaseColor %fi %gl_FragData +OpExecutionMode %main OriginUpperLeft +OpSource GLSL 140 +OpName %main "main" +OpName %v "v" +OpName %BaseColor "BaseColor" +OpName %fi "fi" +OpName %gl_FragData "gl_FragData" +%void = OpTypeVoid +%8 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%BaseColor = OpVariable %_ptr_Input_v4float Input +%_ptr_Input_float = OpTypePointer Input %float +%fi = OpVariable %_ptr_Input_float Input +%float_0 = OpConstant %float 0 +%bool = OpTypeBool +%16 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 +%uint = OpTypeInt 32 0 +%uint_32 = OpConstant %uint 32 +%_arr_v4float_uint_32 = OpTypeArray %v4float %uint_32 +%_ptr_Output__arr_v4float_uint_32 = OpTypePointer Output %_arr_v4float_uint_32 +%gl_FragData = OpVariable %_ptr_Output__arr_v4float_uint_32 Output +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%int_1 = OpConstant %int 1 +)"; + + const std::string before = + R"(%main = OpFunction %void None %8 +%25 = OpLabel +%v = OpVariable %_ptr_Function_v4float Function +%26 = OpLoad %v4float %BaseColor +OpStore %v %26 +%27 = OpLoad %float %fi +%28 = OpFOrdLessThan %bool %27 %float_0 +OpSelectionMerge %29 None +OpBranchConditional %28 %30 %29 +%30 = OpLabel +OpStore %v %16 +OpBranch %29 +%29 = OpLabel +%31 = OpLoad %v4float %v +%32 = OpAccessChain %_ptr_Output_v4float %gl_FragData %int_0 +OpStore %32 %31 +%33 = OpLoad %v4float %v +%34 = OpAccessChain %_ptr_Output_v4float %gl_FragData %int_1 +OpStore %34 %33 +OpReturn +OpFunctionEnd +)"; + + const std::string after = + R"(%main = OpFunction %void None %8 +%25 = OpLabel +%v = OpVariable %_ptr_Function_v4float Function +%26 = OpLoad %v4float %BaseColor +OpStore %v %26 +%27 = OpLoad %float %fi +%28 = OpFOrdLessThan %bool %27 %float_0 +OpSelectionMerge %29 None +OpBranchConditional %28 %30 %29 +%30 = OpLabel +OpStore %v %16 +OpBranch %29 +%29 = OpLabel +%31 = OpLoad %v4float %v +%32 = OpAccessChain %_ptr_Output_v4float %gl_FragData %int_0 +OpStore %32 %31 +%34 = OpAccessChain %_ptr_Output_v4float %gl_FragData %int_1 +OpStore %34 %31 +OpReturn +OpFunctionEnd +)"; + + SinglePassRunAndCheck( + predefs + before, predefs + after, true, true); +} + +TEST_F(LocalSingleBlockLoadStoreElimTest, + NoStoreElimIfInterveningAccessChainLoad) { + // + // Note that even though the Load to %v is eliminated, the Store to %v + // is not eliminated due to the following access chain reference. + // + // #version 140 + // + // in vec4 BaseColor; + // flat in int Idx; + // + // void main() + // { + // vec4 v = BaseColor; + // float f = v[Idx]; + // gl_FragColor = v/f; + // } + + const std::string predefs = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %BaseColor %Idx %gl_FragColor +OpExecutionMode %main OriginUpperLeft +OpSource GLSL 140 +OpName %main "main" +OpName %v "v" +OpName %BaseColor "BaseColor" +OpName %f "f" +OpName %Idx "Idx" +OpName %gl_FragColor "gl_FragColor" +OpDecorate %Idx Flat +%void = OpTypeVoid +%9 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%BaseColor = OpVariable %_ptr_Input_v4float Input +%_ptr_Function_float = OpTypePointer Function %float +%int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%Idx = OpVariable %_ptr_Input_int Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%gl_FragColor = OpVariable %_ptr_Output_v4float Output +)"; + + const std::string before = + R"(%main = OpFunction %void None %9 +%18 = OpLabel +%v = OpVariable %_ptr_Function_v4float Function +%f = OpVariable %_ptr_Function_float Function +%19 = OpLoad %v4float %BaseColor +OpStore %v %19 +%20 = OpLoad %int %Idx +%21 = OpAccessChain %_ptr_Function_float %v %20 +%22 = OpLoad %float %21 +OpStore %f %22 +%23 = OpLoad %v4float %v +%24 = OpLoad %float %f +%25 = OpCompositeConstruct %v4float %24 %24 %24 %24 +%26 = OpFDiv %v4float %23 %25 +OpStore %gl_FragColor %26 +OpReturn +OpFunctionEnd +)"; + + const std::string after = + R"(%main = OpFunction %void None %9 +%18 = OpLabel +%v = OpVariable %_ptr_Function_v4float Function +%f = OpVariable %_ptr_Function_float Function +%19 = OpLoad %v4float %BaseColor +OpStore %v %19 +%20 = OpLoad %int %Idx +%21 = OpAccessChain %_ptr_Function_float %v %20 +%22 = OpLoad %float %21 +%25 = OpCompositeConstruct %v4float %22 %22 %22 %22 +%26 = OpFDiv %v4float %19 %25 +OpStore %gl_FragColor %26 +OpReturn +OpFunctionEnd +)"; + + SinglePassRunAndCheck( + predefs + before, predefs + after, true, true); +} + +TEST_F(LocalSingleBlockLoadStoreElimTest, NoElimIfInterveningAccessChainStore) { + // #version 140 + // + // in vec4 BaseColor; + // flat in int Idx; + // + // void main() + // { + // vec4 v = BaseColor; + // v[Idx] = 0; + // gl_FragColor = v; + // } + + const std::string assembly = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %BaseColor %Idx %gl_FragColor +OpExecutionMode %main OriginUpperLeft +OpSource GLSL 140 +OpName %main "main" +OpName %v "v" +OpName %BaseColor "BaseColor" +OpName %Idx "Idx" +OpName %gl_FragColor "gl_FragColor" +OpDecorate %Idx Flat +%void = OpTypeVoid +%8 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%BaseColor = OpVariable %_ptr_Input_v4float Input +%int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int +%Idx = OpVariable %_ptr_Input_int Input +%float_0 = OpConstant %float 0 +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Output_v4float = OpTypePointer Output %v4float +%gl_FragColor = OpVariable %_ptr_Output_v4float Output +%main = OpFunction %void None %8 +%18 = OpLabel +%v = OpVariable %_ptr_Function_v4float Function +%19 = OpLoad %v4float %BaseColor +OpStore %v %19 +%20 = OpLoad %int %Idx +%21 = OpAccessChain %_ptr_Function_float %v %20 +OpStore %21 %float_0 +%22 = OpLoad %v4float %v +OpStore %gl_FragColor %22 +OpReturn +OpFunctionEnd +)"; + + SinglePassRunAndCheck( + assembly, assembly, false, true); +} + +TEST_F(LocalSingleBlockLoadStoreElimTest, NoElimIfInterveningFunctionCall) { + // #version 140 + // + // in vec4 BaseColor; + // + // void foo() { + // } + // + // void main() + // { + // vec4 v = BaseColor; + // foo(); + // gl_FragColor = v; + // } + + const std::string assembly = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %BaseColor %gl_FragColor +OpExecutionMode %main OriginUpperLeft +OpSource GLSL 140 +OpName %main "main" +OpName %foo_ "foo(" +OpName %v "v" +OpName %BaseColor "BaseColor" +OpName %gl_FragColor "gl_FragColor" +%void = OpTypeVoid +%8 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%BaseColor = OpVariable %_ptr_Input_v4float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%gl_FragColor = OpVariable %_ptr_Output_v4float Output +%main = OpFunction %void None %8 +%14 = OpLabel +%v = OpVariable %_ptr_Function_v4float Function +%15 = OpLoad %v4float %BaseColor +OpStore %v %15 +%16 = OpFunctionCall %void %foo_ +%17 = OpLoad %v4float %v +OpStore %gl_FragColor %17 +OpReturn +OpFunctionEnd +%foo_ = OpFunction %void None %8 +%18 = OpLabel +OpReturn +OpFunctionEnd +)"; + + SinglePassRunAndCheck( + assembly, assembly, false, true); +} + +TEST_F(LocalSingleBlockLoadStoreElimTest, NoElimIfCopyObjectInFunction) { + // Note: SPIR-V hand edited to insert CopyObject + // + // #version 140 + // + // in vec4 BaseColor; + // + // void main() + // { + // vec4 v1 = BaseColor; + // gl_FragData[0] = v1; + // vec4 v2 = BaseColor * 0.5; + // gl_FragData[1] = v2; + // } + + const std::string assembly = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %BaseColor %gl_FragData +OpExecutionMode %main OriginUpperLeft +OpSource GLSL 140 +OpName %main "main" +OpName %v1 "v1" +OpName %BaseColor "BaseColor" +OpName %gl_FragData "gl_FragData" +OpName %v2 "v2" +%void = OpTypeVoid +%8 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%BaseColor = OpVariable %_ptr_Input_v4float Input +%uint = OpTypeInt 32 0 +%uint_32 = OpConstant %uint 32 +%_arr_v4float_uint_32 = OpTypeArray %v4float %uint_32 +%_ptr_Output__arr_v4float_uint_32 = OpTypePointer Output %_arr_v4float_uint_32 +%gl_FragData = OpVariable %_ptr_Output__arr_v4float_uint_32 Output +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%float_0_5 = OpConstant %float 0.5 +%int_1 = OpConstant %int 1 +%main = OpFunction %void None %8 +%22 = OpLabel +%v1 = OpVariable %_ptr_Function_v4float Function +%v2 = OpVariable %_ptr_Function_v4float Function +%23 = OpLoad %v4float %BaseColor +OpStore %v1 %23 +%24 = OpLoad %v4float %v1 +%25 = OpAccessChain %_ptr_Output_v4float %gl_FragData %int_0 +OpStore %25 %24 +%26 = OpLoad %v4float %BaseColor +%27 = OpVectorTimesScalar %v4float %26 %float_0_5 +%28 = OpCopyObject %_ptr_Function_v4float %v2 +OpStore %28 %27 +%29 = OpLoad %v4float %28 +%30 = OpAccessChain %_ptr_Output_v4float %gl_FragData %int_1 +OpStore %30 %29 +OpReturn +OpFunctionEnd +)"; + + SinglePassRunAndCheck( + assembly, assembly, false, true); +} + +// TODO(greg-lunarg): Add tests to verify handling of these cases: +// +// Other target variable types +// InBounds Access Chains +// Check for correctness in the presence of function calls +// Others? + +} // anonymous namespace diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index 234cdc5..21dd676 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -135,6 +135,8 @@ int main(int argc, char** argv) { optimizer.RegisterPass(CreateInlinePass()); } else if (0 == strcmp(cur_arg, "--convert-local-access-chains")) { optimizer.RegisterPass(CreateLocalAccessChainConvertPass()); + } else if (0 == strcmp(cur_arg, "--eliminate-local-single-block")) { + optimizer.RegisterPass(CreateLocalSingleBlockLoadStoreElimPass()); } else if (0 == strcmp(cur_arg, "--eliminate-dead-const")) { optimizer.RegisterPass(CreateEliminateDeadConstantPass()); } else if (0 == strcmp(cur_arg, "--fold-spec-const-op-composite")) { -- 2.7.4