DeadInsertElim: Detect and DCE dead Inserts

author Greg Fischer <greg@lunarg.com>

Thu, 11 Jan 2018 23:23:58 +0000 (16:23 -0700)

committer Steven Perron <stevenperron@google.com>

Thu, 25 Jan 2018 21:07:21 +0000 (16:07 -0500)
author Greg Fischer <greg@lunarg.com>
Thu, 11 Jan 2018 23:23:58 +0000 (16:23 -0700)
committer Steven Perron <stevenperron@google.com>
Thu, 25 Jan 2018 21:07:21 +0000 (16:07 -0500)
diff --git a/source/opt/insert_extract_elim.cpp b/source/opt/insert_extract_elim.cpp

index 3cfee4d..4940fe6 100644 (file)
--- a/source/opt/insert_extract_elim.cpp
+++ b/source/opt/insert_extract_elim.cpp
@@ -30,6 +30,10 @@ namespace {
  const uint32_t kExtractCompositeIdInIdx = 0;
  const uint32_t kInsertObjectIdInIdx = 0;
  const uint32_t kInsertCompositeIdInIdx = 1;
+const uint32_t kTypeVectorCountInIdx = 1;
+const uint32_t kTypeMatrixCountInIdx = 1;
+const uint32_t kTypeArrayLengthIdInIdx = 1;
+const uint32_t kTypeIntWidthInIdx = 0;
  const uint32_t kConstantValueInIdx = 0;
  const uint32_t kVectorShuffleVec1IdInIdx = 0;
  const uint32_t kVectorShuffleVec2IdInIdx = 1;
@@ -75,6 +79,216 @@ bool InsertExtractElimPass::IsVectorType(uint32_t typeId) {
    return typeInst->opcode() == SpvOpTypeVector;
  }
  
+bool InsertExtractElimPass::IsComposite(uint32_t typeId) {
+  ir::Instruction* typeInst = get_def_use_mgr()->GetDef(typeId);
+  return spvOpcodeIsComposite(typeInst->opcode());
+}
+
+uint32_t InsertExtractElimPass::NumComponents(uint32_t typeId) {
+  ir::Instruction* typeInst = get_def_use_mgr()->GetDef(typeId);
+  switch (typeInst->opcode()) {
+    case SpvOpTypeVector: {
+      return typeInst->GetSingleWordInOperand(kTypeVectorCountInIdx);
+    } break;
+    case SpvOpTypeMatrix: {
+      return typeInst->GetSingleWordInOperand(kTypeMatrixCountInIdx);
+    } break;
+    case SpvOpTypeArray: {
+      uint32_t lenId =
+          typeInst->GetSingleWordInOperand(kTypeArrayLengthIdInIdx);
+      ir::Instruction* lenInst = get_def_use_mgr()->GetDef(lenId);
+      if (lenInst->opcode() != SpvOpConstant) return 0;
+      uint32_t lenTypeId = lenInst->type_id();
+      ir::Instruction* lenTypeInst = get_def_use_mgr()->GetDef(lenTypeId);
+      // TODO(greg-lunarg): Support non-32-bit array length
+      if (lenTypeInst->GetSingleWordInOperand(kTypeIntWidthInIdx) != 32)
+        return 0;
+      return lenInst->GetSingleWordInOperand(kConstantValueInIdx);
+    } break;
+    case SpvOpTypeStruct: {
+      return typeInst->NumInOperands();
+    } break;
+    default: { return 0; } break;
+  }
+}
+
+void InsertExtractElimPass::MarkInsertChain(ir::Instruction* insertChain,
+                                            std::vector<uint32_t>* pExtIndices,
+                                            uint32_t extOffset) {
+  // Not currently optimizing array inserts.
+  ir::Instruction* typeInst = get_def_use_mgr()->GetDef(insertChain->type_id());
+  if (typeInst->opcode() == SpvOpTypeArray) return;
+  // Insert chains are only composed of inserts and phis
+  if (insertChain->opcode() != SpvOpCompositeInsert &&
+      insertChain->opcode() != SpvOpPhi)
+    return;
+  // If extract indices are empty, mark all subcomponents if type
+  // is constant length.
+  if (pExtIndices == nullptr) {
+    uint32_t cnum = NumComponents(insertChain->type_id());
+    if (cnum > 0) {
+      std::vector<uint32_t> extIndices;
+      for (uint32_t i = 0; i < cnum; i++) {
+        extIndices.clear();
+        extIndices.push_back(i);
+        MarkInsertChain(insertChain, &extIndices, 0);
+      }
+      return;
+    }
+  }
+  ir::Instruction* insInst = insertChain;
+  while (insInst->opcode() == SpvOpCompositeInsert) {
+    // If no extract indices, mark insert and inserted object (which might
+    // also be an insert chain) and continue up the chain though the input
+    // composite.
+    //
+    // Note: We mark inserted objects in this function (rather than in
+    // EliminateDeadInsertsOnePass) because in some cases, we can do it
+    // more accurately here.
+    if (pExtIndices == nullptr) {
+      liveInserts_.insert(insInst->result_id());
+      uint32_t objId = insInst->GetSingleWordInOperand(kInsertObjectIdInIdx);
+      MarkInsertChain(get_def_use_mgr()->GetDef(objId), nullptr, 0);
+    }
+    // If extract indices match insert, we are done. Mark insert and
+    // inserted object.
+    else if (ExtInsMatch(*pExtIndices, insInst, extOffset)) {
+      liveInserts_.insert(insInst->result_id());
+      uint32_t objId = insInst->GetSingleWordInOperand(kInsertObjectIdInIdx);
+      MarkInsertChain(get_def_use_mgr()->GetDef(objId), nullptr, 0);
+      break;
+    }
+    // If non-matching intersection, mark insert
+    else if (ExtInsConflict(*pExtIndices, insInst, extOffset)) {
+      liveInserts_.insert(insInst->result_id());
+      // If more extract indices than insert, we are done. Use remaining
+      // extract indices to mark inserted object.
+      uint32_t numInsertIndices = insInst->NumInOperands() - 2;
+      if (pExtIndices->size() - extOffset > numInsertIndices) {
+        uint32_t objId = insInst->GetSingleWordInOperand(kInsertObjectIdInIdx);
+        MarkInsertChain(get_def_use_mgr()->GetDef(objId), pExtIndices,
+                        extOffset + numInsertIndices);
+        break;
+      }
+      // If fewer extract indices than insert, also mark inserted object and
+      // continue up chain.
+      else {
+        uint32_t objId = insInst->GetSingleWordInOperand(kInsertObjectIdInIdx);
+        MarkInsertChain(get_def_use_mgr()->GetDef(objId), nullptr, 0);
+      }
+    }
+    // Get next insert in chain
+    const uint32_t compId =
+        insInst->GetSingleWordInOperand(kInsertCompositeIdInIdx);
+    insInst = get_def_use_mgr()->GetDef(compId);
+  }
+  // If insert chain ended with phi, do recursive call on each operand
+  if (insInst->opcode() != SpvOpPhi) return;
+  // Mark phi visited to prevent potential infinite loop. If phi is already
+  // visited, return to avoid infinite loop
+  if (!visitedPhis_.insert(insInst->result_id()).second) return;
+  uint32_t icnt = 0;
+  insInst->ForEachInId([&icnt, &pExtIndices, &extOffset, this](uint32_t* idp) {
+    if (icnt % 2 == 0) {
+      ir::Instruction* pi = get_def_use_mgr()->GetDef(*idp);
+      MarkInsertChain(pi, pExtIndices, extOffset);
+    }
+    ++icnt;
+  });
+  // Unmark phi when done visiting
+  visitedPhis_.erase(insInst->result_id());
+}
+
+bool InsertExtractElimPass::EliminateDeadInserts(ir::Function* func) {
+  bool modified = false;
+  bool lastmodified = true;
+  // Each pass can delete dead instructions, thus potentially revealing
+  // new dead insertions ie insertions with no uses.
+  while (lastmodified) {
+    lastmodified = EliminateDeadInsertsOnePass(func);
+    modified |= lastmodified;
+  }
+  return modified;
+}
+
+bool InsertExtractElimPass::EliminateDeadInsertsOnePass(ir::Function* func) {
+  bool modified = false;
+  liveInserts_.clear();
+  visitedPhis_.clear();
+  // Mark all live inserts
+  for (auto bi = func->begin(); bi != func->end(); ++bi) {
+    for (auto ii = bi->begin(); ii != bi->end(); ++ii) {
+      // Only process Inserts and composite Phis
+      SpvOp op = ii->opcode();
+      if (op != SpvOpCompositeInsert &&
+          (op != SpvOpPhi || !IsComposite(ii->type_id())))
+        continue;
+      // The marking algorithm can be expensive for large arrays and the
+      // efficacy of eliminating dead inserts into arrays is questionable.
+      // Skip optimizing array inserts for now. Just mark them live.
+      // TODO(greg-lunarg): Eliminate dead array inserts
+      if (op == SpvOpCompositeInsert) {
+        ir::Instruction* typeInst = get_def_use_mgr()->GetDef(ii->type_id());
+        if (typeInst->opcode() == SpvOpTypeArray) {
+          liveInserts_.insert(ii->result_id());
+          continue;
+        }
+      }
+      const uint32_t id = ii->result_id();
+      get_def_use_mgr()->ForEachUser(id, [&ii, this](ir::Instruction* user) {
+        switch (user->opcode()) {
+          case SpvOpCompositeInsert:
+          case SpvOpPhi:
+            // Use by insert or phi does not initiate marking
+            break;
+          case SpvOpCompositeExtract: {
+            // Capture extract indices
+            std::vector<uint32_t> extIndices;
+            uint32_t icnt = 0;
+            user->ForEachInOperand([&icnt, &extIndices](const uint32_t* idp) {
+              if (icnt > 0) extIndices.push_back(*idp);
+              ++icnt;
+            });
+            // Mark all inserts in chain that intersect with extract
+            MarkInsertChain(&*ii, &extIndices, 0);
+          } break;
+          default: {
+            // Mark inserts in chain for all components
+            MarkInsertChain(&*ii, nullptr, 0);
+          } break;
+        }
+      });
+    }
+  }
+  // Find and disconnect dead inserts
+  std::vector<ir::Instruction*> dead_instructions;
+  for (auto bi = func->begin(); bi != func->end(); ++bi) {
+    for (auto ii = bi->begin(); ii != bi->end(); ++ii) {
+      if (ii->opcode() != SpvOpCompositeInsert) continue;
+      const uint32_t id = ii->result_id();
+      if (liveInserts_.find(id) != liveInserts_.end()) continue;
+      const uint32_t replId =
+          ii->GetSingleWordInOperand(kInsertCompositeIdInIdx);
+      (void)context()->ReplaceAllUsesWith(id, replId);
+      dead_instructions.push_back(&*ii);
+      modified = true;
+    }
+  }
+  // DCE dead inserts
+  while (!dead_instructions.empty()) {
+    ir::Instruction* inst = dead_instructions.back();
+    dead_instructions.pop_back();
+    DCEInst(inst, [&dead_instructions](ir::Instruction* other_inst) {
+      auto i = std::find(dead_instructions.begin(), dead_instructions.end(),
+                         other_inst);
+      if (i != dead_instructions.end()) {
+        dead_instructions.erase(i);
+      }
+    });
+  }
+  return modified;
+}
+
  uint32_t InsertExtractElimPass::DoExtract(ir::Instruction* compInst,
                                            std::vector<uint32_t>* pExtIndices,
                                            uint32_t extOffset) {
@@ -223,6 +437,7 @@ bool InsertExtractElimPass::EliminateInsertExtract(ir::Function* func) {
        }
      }
    }
+  modified |= EliminateDeadInserts(func);
    return modified;
  }
  
diff --git a/source/opt/insert_extract_elim.h b/source/opt/insert_extract_elim.h

index a645890..5e585fe 100644 (file)
--- a/source/opt/insert_extract_elim.h
+++ b/source/opt/insert_extract_elim.h
@@ -26,14 +26,14 @@
  #include "basic_block.h"
  #include "def_use_manager.h"
  #include "ir_context.h"
+#include "mem_pass.h"
  #include "module.h"
-#include "pass.h"
  
  namespace spvtools {
  namespace opt {
  
  // See optimizer.hpp for documentation.
-class InsertExtractElimPass : public Pass {
+class InsertExtractElimPass : public MemPass {
   public:
    InsertExtractElimPass();
    const char* name() const override { return "eliminate-insert-extract"; }
@@ -58,6 +58,32 @@ class InsertExtractElimPass : public Pass {
    // Return true if |typeId| is a vector type
    bool IsVectorType(uint32_t typeId);
  
+  // Return true if |typeId| is composite.
+  bool IsComposite(uint32_t typeId);
+
+  // Return the number of subcomponents in the composite type |typeId|.
+  // Return 0 if not a composite type or number of components is not a
+  // 32-bit constant.
+  uint32_t NumComponents(uint32_t typeId);
+
+  // Mark all inserts in instruction chain ending at |insertChain| with
+  // indices that intersect with extract indices |extIndices| starting with
+  // index at |extOffset|. Chains are composed solely of Inserts and Phis.
+  // Mark all inserts in chain if |extIndices| is nullptr.
+  void MarkInsertChain(ir::Instruction* insertChain,
+                       std::vector<uint32_t>* extIndices, uint32_t extOffset);
+
+  // Perform EliminateDeadInsertsOnePass(|func|) until no modification is
+  // made. Return true if modified.
+  bool EliminateDeadInserts(ir::Function* func);
+
+  // DCE all dead struct, matrix and vector inserts in |func|. An insert is
+  // dead if the value it inserts is never used. Replace any reference to the
+  // insert with its original composite. Return true if modified. Dead inserts
+  // in dependence cycles are not currently eliminated. Dead inserts into
+  // arrays are not currently eliminated.
+  bool EliminateDeadInsertsOnePass(ir::Function* func);
+
    // Return id of component of |cinst| specified by |extIndices| starting with
    // index at |extOffset|. Return 0 if indices cannot be matched exactly.
    uint32_t DoExtract(ir::Instruction* cinst, std::vector<uint32_t>* extIndices,
@@ -78,6 +104,12 @@ class InsertExtractElimPass : public Pass {
    void Initialize(ir::IRContext* c);
    Pass::Status ProcessImpl();
  
+  // Live inserts
+  std::unordered_set<uint32_t> liveInserts_;
+
+  // Visited phis as insert chain is traversed; used to avoid infinite loop
+  std::unordered_set<uint32_t> visitedPhis_;
+
    // Extensions supported by this pass.
    std::unordered_set<std::string> extensions_whitelist_;
  };
diff --git a/test/opt/insert_extract_elim_test.cpp b/test/opt/insert_extract_elim_test.cpp

index 68a7c3c..b3b9440 100644 (file)
--- a/test/opt/insert_extract_elim_test.cpp
+++ b/test/opt/insert_extract_elim_test.cpp
@@ -484,12 +484,12 @@ TEST_F(InsertExtractElimTest, ConflictingInsertPreventsOptimization2) {
    // void main()
    // {
    //     S_t s0;
-  //     s0.v1[1] = 1.0;
+  //     s0.v1[1] = 1.0; // dead
    //     s0.v1 = Baseline;
    //     gl_FragColor = vec4(s0.v1[1], 0.0, 0.0, 0.0);
    // }
  
-  const std::string assembly =
+  const std::string before_predefs =
        R"(OpCapability Shader
  %1 = OpExtInstImport "GLSL.std.450"
  OpMemoryModel Logical GLSL450
@@ -521,7 +521,43 @@ OpName %gl_FragColor "gl_FragColor"
  %_ptr_Output_v4float = OpTypePointer Output %v4float
  %gl_FragColor = OpVariable %_ptr_Output_v4float Output
  %float_0 = OpConstant %float 0
-%main = OpFunction %void None %8
+)";
+
+  const std::string after_predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %BaseColor %gl_FragColor
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 140
+OpName %main "main"
+OpName %S_t "S_t"
+OpMemberName %S_t 0 "v0"
+OpMemberName %S_t 1 "v1"
+OpName %s0 "s0"
+OpName %BaseColor "BaseColor"
+OpName %gl_FragColor "gl_FragColor"
+%void = OpTypeVoid
+%8 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v4float = OpTypeVector %float 4
+%S_t = OpTypeStruct %v4float %v4float
+%_ptr_Function_S_t = OpTypePointer Function %S_t
+%int = OpTypeInt 32 1
+%int_1 = OpConstant %int 1
+%uint = OpTypeInt 32 0
+%uint_1 = OpConstant %uint 1
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%BaseColor = OpVariable %_ptr_Input_v4float Input
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%gl_FragColor = OpVariable %_ptr_Output_v4float Output
+%float_0 = OpConstant %float 0
+)";
+
+  const std::string before =
+      R"(%main = OpFunction %void None %8
  %22 = OpLabel
  %s0 = OpVariable %_ptr_Function_S_t Function
  %23 = OpLoad %S_t %s0
@@ -535,8 +571,22 @@ OpReturn
  OpFunctionEnd
  )";
  
-  SinglePassRunAndCheck<opt::InsertExtractElimPass>(assembly, assembly, true,
-                                                    true);
+  const std::string after =
+      R"(%main = OpFunction %void None %8
+%22 = OpLabel
+%s0 = OpVariable %_ptr_Function_S_t Function
+%23 = OpLoad %S_t %s0
+%25 = OpLoad %v4float %BaseColor
+%26 = OpCompositeInsert %S_t %25 %23 1
+%27 = OpCompositeExtract %float %26 1 1
+%28 = OpCompositeConstruct %v4float %27 %float_0 %float_0 %float_0
+OpStore %gl_FragColor %28
+OpReturn
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<opt::InsertExtractElimPass>(
+      before_predefs + before, after_predefs + after, true, true);
  }
  
  TEST_F(InsertExtractElimTest, MixWithConstants) {
@@ -805,6 +855,663 @@ OpFunctionEnd
        predefs + before, predefs + after, true, true);
  }
  
+TEST_F(InsertExtractElimTest, InsertAfterInsertElim) {
+  // With two insertions to the same offset, the first is dead.
+  //
+  // Note: The SPIR-V assembly has had store/load elimination
+  // performed to allow the inserts and extracts to directly
+  // reference each other.
+  //
+  // #version 450
+  //
+  // layout (location=0) in float In0;
+  // layout (location=1) in float In1;
+  // layout (location=2) in vec2 In2;
+  // layout (location=0) out vec4 OutColor;
+  //
+  // void main()
+  // {
+  //     vec2 v = In2;
+  //     v.x = In0 + In1; // dead
+  //     v.x = 0.0;
+  //     OutColor = v.xyxy;
+  // }
+
+  const std::string before_predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %In2 %In0 %In1 %OutColor
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 450
+OpName %main "main"
+OpName %In2 "In2"
+OpName %In0 "In0"
+OpName %In1 "In1"
+OpName %OutColor "OutColor"
+OpName %_Globals_ "_Globals_"
+OpMemberName %_Globals_ 0 "g_b"
+OpMemberName %_Globals_ 1 "g_n"
+OpName %_ ""
+OpDecorate %In2 Location 2
+OpDecorate %In0 Location 0
+OpDecorate %In1 Location 1
+OpDecorate %OutColor Location 0
+OpMemberDecorate %_Globals_ 0 Offset 0
+OpMemberDecorate %_Globals_ 1 Offset 4
+OpDecorate %_Globals_ Block
+OpDecorate %_ DescriptorSet 0
+OpDecorate %_ Binding 0
+%void = OpTypeVoid
+%11 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v2float = OpTypeVector %float 2
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%In2 = OpVariable %_ptr_Input_v2float Input
+%_ptr_Input_float = OpTypePointer Input %float
+%In0 = OpVariable %_ptr_Input_float Input
+%In1 = OpVariable %_ptr_Input_float Input
+%uint = OpTypeInt 32 0
+%_ptr_Function_float = OpTypePointer Function %float
+%float_0 = OpConstant %float 0
+%v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%OutColor = OpVariable %_ptr_Output_v4float Output
+%int = OpTypeInt 32 1
+%_Globals_ = OpTypeStruct %uint %int
+%_ptr_Uniform__Globals_ = OpTypePointer Uniform %_Globals_
+%_ = OpVariable %_ptr_Uniform__Globals_ Uniform
+)";
+
+  const std::string after_predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %In2 %In0 %In1 %OutColor
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 450
+OpName %main "main"
+OpName %In2 "In2"
+OpName %In0 "In0"
+OpName %In1 "In1"
+OpName %OutColor "OutColor"
+OpName %_Globals_ "_Globals_"
+OpMemberName %_Globals_ 0 "g_b"
+OpMemberName %_Globals_ 1 "g_n"
+OpName %_ ""
+OpDecorate %In2 Location 2
+OpDecorate %In0 Location 0
+OpDecorate %In1 Location 1
+OpDecorate %OutColor Location 0
+OpMemberDecorate %_Globals_ 0 Offset 0
+OpMemberDecorate %_Globals_ 1 Offset 4
+OpDecorate %_Globals_ Block
+OpDecorate %_ DescriptorSet 0
+OpDecorate %_ Binding 0
+%void = OpTypeVoid
+%10 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v2float = OpTypeVector %float 2
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%In2 = OpVariable %_ptr_Input_v2float Input
+%_ptr_Input_float = OpTypePointer Input %float
+%In0 = OpVariable %_ptr_Input_float Input
+%In1 = OpVariable %_ptr_Input_float Input
+%uint = OpTypeInt 32 0
+%_ptr_Function_float = OpTypePointer Function %float
+%float_0 = OpConstant %float 0
+%v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%OutColor = OpVariable %_ptr_Output_v4float Output
+%int = OpTypeInt 32 1
+%_Globals_ = OpTypeStruct %uint %int
+%_ptr_Uniform__Globals_ = OpTypePointer Uniform %_Globals_
+%_ = OpVariable %_ptr_Uniform__Globals_ Uniform
+)";
+
+  const std::string before =
+      R"(%main = OpFunction %void None %11
+%25 = OpLabel
+%26 = OpLoad %v2float %In2
+%27 = OpLoad %float %In0
+%28 = OpLoad %float %In1
+%29 = OpFAdd %float %27 %28
+%35 = OpCompositeInsert %v2float %29 %26 0
+%37 = OpCompositeInsert %v2float %float_0 %35 0
+%33 = OpVectorShuffle %v4float %37 %37 0 1 0 1
+OpStore %OutColor %33
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string after =
+      R"(%main = OpFunction %void None %10
+%23 = OpLabel
+%24 = OpLoad %v2float %In2
+%29 = OpCompositeInsert %v2float %float_0 %24 0
+%30 = OpVectorShuffle %v4float %29 %29 0 1 0 1
+OpStore %OutColor %30
+OpReturn
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<opt::InsertExtractElimPass>(
+      before_predefs + before, after_predefs + after, true, true);
+}
+
+TEST_F(InsertExtractElimTest, DeadInsertInChainWithPhi) {
+  // Dead insert eliminated with phi in insertion chain.
+  //
+  // Note: The SPIR-V assembly has had store/load elimination
+  // performed to allow the inserts and extracts to directly
+  // reference each other.
+  //
+  // #version 450
+  //
+  // layout (location=0) in vec4 In0;
+  // layout (location=1) in float In1;
+  // layout (location=2) in float In2;
+  // layout (location=0) out vec4 OutColor;
+  //
+  // layout(std140, binding = 0 ) uniform _Globals_
+  // {
+  //     bool g_b;
+  // };
+  //
+  // void main()
+  // {
+  //     vec4 v = In0;
+  //     v.z = In1 + In2;
+  //     if (g_b) v.w = 1.0;
+  //     OutColor = vec4(v.x,v.y,0.0,v.w);
+  // }
+
+  const std::string before_predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %In0 %In1 %In2 %OutColor
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 450
+OpName %main "main"
+OpName %In0 "In0"
+OpName %In1 "In1"
+OpName %In2 "In2"
+OpName %_Globals_ "_Globals_"
+OpMemberName %_Globals_ 0 "g_b"
+OpName %_ ""
+OpName %OutColor "OutColor"
+OpDecorate %In0 Location 0
+OpDecorate %In1 Location 1
+OpDecorate %In2 Location 2
+OpMemberDecorate %_Globals_ 0 Offset 0
+OpDecorate %_Globals_ Block
+OpDecorate %_ DescriptorSet 0
+OpDecorate %_ Binding 0
+OpDecorate %OutColor Location 0
+%void = OpTypeVoid
+%11 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%In0 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Input_float = OpTypePointer Input %float
+%In1 = OpVariable %_ptr_Input_float Input
+%In2 = OpVariable %_ptr_Input_float Input
+%uint = OpTypeInt 32 0
+%_ptr_Function_float = OpTypePointer Function %float
+%_Globals_ = OpTypeStruct %uint
+%_ptr_Uniform__Globals_ = OpTypePointer Uniform %_Globals_
+%_ = OpVariable %_ptr_Uniform__Globals_ Uniform
+%int = OpTypeInt 32 1
+%int_0 = OpConstant %int 0
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%bool = OpTypeBool
+%uint_0 = OpConstant %uint 0
+%float_1 = OpConstant %float 1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%OutColor = OpVariable %_ptr_Output_v4float Output
+%float_0 = OpConstant %float 0
+)";
+
+  const std::string after_predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %In0 %In1 %In2 %OutColor
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 450
+OpName %main "main"
+OpName %In0 "In0"
+OpName %In1 "In1"
+OpName %In2 "In2"
+OpName %_Globals_ "_Globals_"
+OpMemberName %_Globals_ 0 "g_b"
+OpName %_ ""
+OpName %OutColor "OutColor"
+OpDecorate %In0 Location 0
+OpDecorate %In1 Location 1
+OpDecorate %In2 Location 2
+OpMemberDecorate %_Globals_ 0 Offset 0
+OpDecorate %_Globals_ Block
+OpDecorate %_ DescriptorSet 0
+OpDecorate %_ Binding 0
+OpDecorate %OutColor Location 0
+%void = OpTypeVoid
+%10 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%In0 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Input_float = OpTypePointer Input %float
+%In1 = OpVariable %_ptr_Input_float Input
+%In2 = OpVariable %_ptr_Input_float Input
+%uint = OpTypeInt 32 0
+%_ptr_Function_float = OpTypePointer Function %float
+%_Globals_ = OpTypeStruct %uint
+%_ptr_Uniform__Globals_ = OpTypePointer Uniform %_Globals_
+%_ = OpVariable %_ptr_Uniform__Globals_ Uniform
+%int = OpTypeInt 32 1
+%int_0 = OpConstant %int 0
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%bool = OpTypeBool
+%uint_0 = OpConstant %uint 0
+%float_1 = OpConstant %float 1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%OutColor = OpVariable %_ptr_Output_v4float Output
+%float_0 = OpConstant %float 0
+)";
+
+  const std::string before =
+      R"(%main = OpFunction %void None %11
+%31 = OpLabel
+%32 = OpLoad %v4float %In0
+%33 = OpLoad %float %In1
+%34 = OpLoad %float %In2
+%35 = OpFAdd %float %33 %34
+%51 = OpCompositeInsert %v4float %35 %32 2
+%37 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
+%38 = OpLoad %uint %37
+%39 = OpINotEqual %bool %38 %uint_0
+OpSelectionMerge %40 None
+OpBranchConditional %39 %41 %40
+%41 = OpLabel
+%53 = OpCompositeInsert %v4float %float_1 %51 3
+OpBranch %40
+%40 = OpLabel
+%60 = OpPhi %v4float %51 %31 %53 %41
+%55 = OpCompositeExtract %float %60 0
+%57 = OpCompositeExtract %float %60 1
+%59 = OpCompositeExtract %float %60 3
+%49 = OpCompositeConstruct %v4float %55 %57 %float_0 %59
+OpStore %OutColor %49
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string after =
+      R"(%main = OpFunction %void None %10
+%27 = OpLabel
+%28 = OpLoad %v4float %In0
+%33 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
+%34 = OpLoad %uint %33
+%35 = OpINotEqual %bool %34 %uint_0
+OpSelectionMerge %36 None
+OpBranchConditional %35 %37 %36
+%37 = OpLabel
+%38 = OpCompositeInsert %v4float %float_1 %28 3
+OpBranch %36
+%36 = OpLabel
+%39 = OpPhi %v4float %28 %27 %38 %37
+%40 = OpCompositeExtract %float %39 0
+%41 = OpCompositeExtract %float %39 1
+%42 = OpCompositeExtract %float %39 3
+%43 = OpCompositeConstruct %v4float %40 %41 %float_0 %42
+OpStore %OutColor %43
+OpReturn
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<opt::InsertExtractElimPass>(
+      before_predefs + before, after_predefs + after, true, true);
+}
+
+TEST_F(InsertExtractElimTest, DeadInsertTwoPasses) {
+  // Dead insert which requires two passes to eliminate
+  //
+  // Note: The SPIR-V assembly has had store/load elimination
+  // performed to allow the inserts and extracts to directly
+  // reference each other.
+  //
+  // #version 450
+  //
+  // layout (location=0) in vec4 In0;
+  // layout (location=1) in float In1;
+  // layout (location=2) in float In2;
+  // layout (location=0) out vec4 OutColor;
+  //
+  // layout(std140, binding = 0 ) uniform _Globals_
+  // {
+  //     bool g_b;
+  //     bool g_b2;
+  // };
+  //
+  // void main()
+  // {
+  //     vec4 v1, v2;
+  //     v1 = In0;
+  //     v1.y = In1 + In2; // dead, second pass
+  //     if (g_b) v1.x = 1.0;
+  //     v2.x = v1.x;
+  //     v2.y = v1.y; // dead, first pass
+  //     if (g_b2) v2.x = 0.0;
+  //     OutColor = vec4(v2.x,v2.x,0.0,1.0);
+  // }
+
+  const std::string before_predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %In0 %In1 %In2 %OutColor
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 450
+OpName %main "main"
+OpName %In0 "In0"
+OpName %In1 "In1"
+OpName %In2 "In2"
+OpName %_Globals_ "_Globals_"
+OpMemberName %_Globals_ 0 "g_b"
+OpMemberName %_Globals_ 1 "g_b2"
+OpName %_ ""
+OpName %OutColor "OutColor"
+OpDecorate %In0 Location 0
+OpDecorate %In1 Location 1
+OpDecorate %In2 Location 2
+OpMemberDecorate %_Globals_ 0 Offset 0
+OpMemberDecorate %_Globals_ 1 Offset 4
+OpDecorate %_Globals_ Block
+OpDecorate %_ DescriptorSet 0
+OpDecorate %_ Binding 0
+OpDecorate %OutColor Location 0
+%void = OpTypeVoid
+%10 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%In0 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Input_float = OpTypePointer Input %float
+%In1 = OpVariable %_ptr_Input_float Input
+%In2 = OpVariable %_ptr_Input_float Input
+%uint = OpTypeInt 32 0
+%_Globals_ = OpTypeStruct %uint %uint
+%_ptr_Uniform__Globals_ = OpTypePointer Uniform %_Globals_
+%_ = OpVariable %_ptr_Uniform__Globals_ Uniform
+%int = OpTypeInt 32 1
+%int_0 = OpConstant %int 0
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%bool = OpTypeBool
+%uint_0 = OpConstant %uint 0
+%float_1 = OpConstant %float 1
+%int_1 = OpConstant %int 1
+%float_0 = OpConstant %float 0
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%OutColor = OpVariable %_ptr_Output_v4float Output
+%27 = OpUndef %v4float
+)";
+
+  const std::string after_predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %In0 %In1 %In2 %OutColor
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 450
+OpName %main "main"
+OpName %In0 "In0"
+OpName %In1 "In1"
+OpName %In2 "In2"
+OpName %_Globals_ "_Globals_"
+OpMemberName %_Globals_ 0 "g_b"
+OpMemberName %_Globals_ 1 "g_b2"
+OpName %_ ""
+OpName %OutColor "OutColor"
+OpDecorate %In0 Location 0
+OpDecorate %In1 Location 1
+OpDecorate %In2 Location 2
+OpMemberDecorate %_Globals_ 0 Offset 0
+OpMemberDecorate %_Globals_ 1 Offset 4
+OpDecorate %_Globals_ Block
+OpDecorate %_ DescriptorSet 0
+OpDecorate %_ Binding 0
+OpDecorate %OutColor Location 0
+%void = OpTypeVoid
+%10 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%In0 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Input_float = OpTypePointer Input %float
+%In1 = OpVariable %_ptr_Input_float Input
+%In2 = OpVariable %_ptr_Input_float Input
+%uint = OpTypeInt 32 0
+%_Globals_ = OpTypeStruct %uint %uint
+%_ptr_Uniform__Globals_ = OpTypePointer Uniform %_Globals_
+%_ = OpVariable %_ptr_Uniform__Globals_ Uniform
+%int = OpTypeInt 32 1
+%int_0 = OpConstant %int 0
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%bool = OpTypeBool
+%uint_0 = OpConstant %uint 0
+%float_1 = OpConstant %float 1
+%int_1 = OpConstant %int 1
+%float_0 = OpConstant %float 0
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%OutColor = OpVariable %_ptr_Output_v4float Output
+%27 = OpUndef %v4float
+)";
+
+  const std::string before =
+      R"(%main = OpFunction %void None %10
+%28 = OpLabel
+%29 = OpLoad %v4float %In0
+%30 = OpLoad %float %In1
+%31 = OpLoad %float %In2
+%32 = OpFAdd %float %30 %31
+%33 = OpCompositeInsert %v4float %32 %29 1
+%34 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
+%35 = OpLoad %uint %34
+%36 = OpINotEqual %bool %35 %uint_0
+OpSelectionMerge %37 None
+OpBranchConditional %36 %38 %37
+%38 = OpLabel
+%39 = OpCompositeInsert %v4float %float_1 %33 0
+OpBranch %37
+%37 = OpLabel
+%40 = OpPhi %v4float %33 %28 %39 %38
+%41 = OpCompositeExtract %float %40 0
+%42 = OpCompositeInsert %v4float %41 %27 0
+%43 = OpCompositeExtract %float %40 1
+%44 = OpCompositeInsert %v4float %43 %42 1
+%45 = OpAccessChain %_ptr_Uniform_uint %_ %int_1
+%46 = OpLoad %uint %45
+%47 = OpINotEqual %bool %46 %uint_0
+OpSelectionMerge %48 None
+OpBranchConditional %47 %49 %48
+%49 = OpLabel
+%50 = OpCompositeInsert %v4float %float_0 %44 0
+OpBranch %48
+%48 = OpLabel
+%51 = OpPhi %v4float %44 %37 %50 %49
+%52 = OpCompositeExtract %float %51 0
+%53 = OpCompositeExtract %float %51 0
+%54 = OpCompositeConstruct %v4float %52 %53 %float_0 %float_1
+OpStore %OutColor %54
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string after =
+      R"(%main = OpFunction %void None %10
+%28 = OpLabel
+%29 = OpLoad %v4float %In0
+%34 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
+%35 = OpLoad %uint %34
+%36 = OpINotEqual %bool %35 %uint_0
+OpSelectionMerge %37 None
+OpBranchConditional %36 %38 %37
+%38 = OpLabel
+%39 = OpCompositeInsert %v4float %float_1 %29 0
+OpBranch %37
+%37 = OpLabel
+%40 = OpPhi %v4float %29 %28 %39 %38
+%41 = OpCompositeExtract %float %40 0
+%42 = OpCompositeInsert %v4float %41 %27 0
+%45 = OpAccessChain %_ptr_Uniform_uint %_ %int_1
+%46 = OpLoad %uint %45
+%47 = OpINotEqual %bool %46 %uint_0
+OpSelectionMerge %48 None
+OpBranchConditional %47 %49 %48
+%49 = OpLabel
+%50 = OpCompositeInsert %v4float %float_0 %42 0
+OpBranch %48
+%48 = OpLabel
+%51 = OpPhi %v4float %42 %37 %50 %49
+%52 = OpCompositeExtract %float %51 0
+%53 = OpCompositeExtract %float %51 0
+%54 = OpCompositeConstruct %v4float %52 %53 %float_0 %float_1
+OpStore %OutColor %54
+OpReturn
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<opt::InsertExtractElimPass>(
+      before_predefs + before, after_predefs + after, true, true);
+}
+
+TEST_F(InsertExtractElimTest, DeadInsertInCycleToDo) {
+  // Dead insert in chain with cycle. Demonstrates analysis can handle
+  // cycles in chains.
+  //
+  // TODO(greg-lunarg): Improve algorithm to remove dead insert into v.y. Will
+  // likely require similar logic to ADCE.
+  //
+  // Note: The SPIR-V assembly has had store/load elimination
+  // performed to allow the inserts and extracts to directly
+  // reference each other.
+  //
+  // #version 450
+  //
+  // layout (location=0) in vec4 In0;
+  // layout (location=1) in float In1;
+  // layout (location=2) in float In2;
+  // layout (location=0) out vec4 OutColor;
+  //
+  // layout(std140, binding = 0 ) uniform _Globals_
+  // {
+  //     int g_n  ;
+  // };
+  //
+  // void main()
+  // {
+  //     vec2 v = vec2(0.0, 1.0);
+  //     for (int i = 0; i < g_n; i++) {
+  //       v.x = v.x + 1;
+  //       v.y = v.y * 0.9; // dead
+  //     }
+  //     OutColor = vec4(v.x);
+  // }
+
+  const std::string assembly =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %OutColor %In0 %In1 %In2
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 450
+OpName %main "main"
+OpName %_Globals_ "_Globals_"
+OpMemberName %_Globals_ 0 "g_n"
+OpName %_ ""
+OpName %OutColor "OutColor"
+OpName %In0 "In0"
+OpName %In1 "In1"
+OpName %In2 "In2"
+OpMemberDecorate %_Globals_ 0 Offset 0
+OpDecorate %_Globals_ Block
+OpDecorate %_ DescriptorSet 0
+OpDecorate %_ Binding 0
+OpDecorate %OutColor Location 0
+OpDecorate %In0 Location 0
+OpDecorate %In1 Location 1
+OpDecorate %In2 Location 2
+%void = OpTypeVoid
+%10 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v2float = OpTypeVector %float 2
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%float_0 = OpConstant %float 0
+%float_1 = OpConstant %float 1
+%16 = OpConstantComposite %v2float %float_0 %float_1
+%int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+%int_0 = OpConstant %int 0
+%_Globals_ = OpTypeStruct %int
+%_ptr_Uniform__Globals_ = OpTypePointer Uniform %_Globals_
+%_ = OpVariable %_ptr_Uniform__Globals_ Uniform
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+%bool = OpTypeBool
+%float_0_9 = OpConstant %float 0.9
+%int_1 = OpConstant %int 1
+%v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%OutColor = OpVariable %_ptr_Output_v4float Output
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%In0 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Input_float = OpTypePointer Input %float
+%In1 = OpVariable %_ptr_Input_float Input
+%In2 = OpVariable %_ptr_Input_float Input
+%main = OpFunction %void None %10
+%29 = OpLabel
+OpBranch %30
+%30 = OpLabel
+%31 = OpPhi %v2float %16 %29 %32 %33
+%34 = OpPhi %int %int_0 %29 %35 %33
+OpLoopMerge %36 %33 None
+OpBranch %37
+%37 = OpLabel
+%38 = OpAccessChain %_ptr_Uniform_int %_ %int_0
+%39 = OpLoad %int %38
+%40 = OpSLessThan %bool %34 %39
+OpBranchConditional %40 %41 %36
+%41 = OpLabel
+%42 = OpCompositeExtract %float %31 0
+%43 = OpFAdd %float %42 %float_1
+%44 = OpCompositeInsert %v2float %43 %31 0
+%45 = OpCompositeExtract %float %44 1
+%46 = OpFMul %float %45 %float_0_9
+%32 = OpCompositeInsert %v2float %46 %44 1
+OpBranch %33
+%33 = OpLabel
+%35 = OpIAdd %int %34 %int_1
+OpBranch %30
+%36 = OpLabel
+%47 = OpCompositeExtract %float %31 0
+%48 = OpCompositeConstruct %v4float %47 %47 %47 %47
+OpStore %OutColor %48
+OpReturn
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<opt::InsertExtractElimPass>(assembly, assembly, true,
+                                                    true);
+}
+
  // TODO(greg-lunarg): Add tests to verify handling of these cases:
  //
author	Greg Fischer <greg@lunarg.com>
	Thu, 11 Jan 2018 23:23:58 +0000 (16:23 -0700)
committer	Steven Perron <stevenperron@google.com>
	Thu, 25 Jan 2018 21:07:21 +0000 (16:07 -0500)
source/opt/insert_extract_elim.cpp		patch \| blob \| history
source/opt/insert_extract_elim.h		patch \| blob \| history
test/opt/insert_extract_elim_test.cpp		patch \| blob \| history