static const int kSpvTypePointerStorageClass = 1;
static const int kSpvTypePointerTypeId = 2;
static const int kSpvLoopMergeMergeBlockId = 0;
+static const int kSpvLoopMergeContinueTargetIdInIdx = 1;
static const int kSpvSelectionMergeMergeBlockId = 0;
namespace spvtools {
callee_result_ids.insert(rid);
});
+ // If the caller is in a single-block loop, and the callee has multiple
+ // blocks, then the normal inlining logic will place the OpLoopMerge in
+ // the last of several blocks in the loop. Instead, it should be placed
+ // at the end of the first block. First determine if the caller is in a
+ // single block loop. We'll wait to move the OpLoopMerge until the end
+ // of the regular inlining logic, and only if necessary.
+ bool caller_is_single_block_loop = false;
+ if (auto* loop_merge = call_block_itr->GetLoopMergeInst()) {
+ caller_is_single_block_loop =
+ call_block_itr->id() ==
+ loop_merge->GetSingleWordInOperand(kSpvLoopMergeContinueTargetIdInIdx);
+ }
+
+ bool callee_begins_with_structured_header =
+ (*(calleeFn->begin())).GetMergeInst() != nullptr;
+
// Clone and map callee code. Copy caller block code to beginning of
// first block and end of last block.
bool prevInstWasReturn = false;
const uint32_t calleeTypeId = calleeFn->type_id();
// new_blk_ptr is a new basic block in the caller. New instructions are
// written to it. It is created when we encounter the OpLabel
- // of the first callee block.
+ // of the first callee block. It is appended to new_blocks only when
+ // it is complete.
std::unique_ptr<ir::BasicBlock> new_blk_ptr;
calleeFn->ForEachInst([&new_blocks, &callee2caller, &call_block_itr,
&call_inst_itr, &new_blk_ptr, &prevInstWasReturn,
- &returnLabelId, &returnVarId, &calleeTypeId,
+ &returnLabelId, &returnVarId,
+ caller_is_single_block_loop,
+ callee_begins_with_structured_header, &calleeTypeId,
&multiBlocks, &postCallSB, &preCallSB, multiReturn,
&singleTripLoopHeaderId, &singleTripLoopContinueId,
- &callee_result_ids, this](
- const ir::Instruction* cpi) {
+ &callee_result_ids, this](const ir::Instruction* cpi) {
switch (cpi->opcode()) {
case SpvOpFunction:
case SpvOpFunctionParameter:
}
new_blk_ptr->AddInstruction(std::move(cp_inst));
}
- // If callee has multiple returns, insert header block for
+ if (caller_is_single_block_loop &&
+ callee_begins_with_structured_header) {
+ // We can't place both the caller's merge instruction and another
+ // merge instruction in the same block. So split the calling block.
+ // Insert an unconditional branch to a new guard block. Later,
+ // once we know the ID of the last block, we will move the caller's
+ // OpLoopMerge from the last generated block into the first block.
+ // We also wait to avoid invalidating various iterators.
+ const auto guard_block_id = this->TakeNextId();
+ AddBranch(guard_block_id, &new_blk_ptr);
+ new_blocks->push_back(std::move(new_blk_ptr));
+ // Start the next block.
+ new_blk_ptr.reset(new ir::BasicBlock(NewLabel(guard_block_id)));
+ // Reset the mapping of the callee's entry block to point to
+ // the guard block. Do this so we can fix up phis later on to
+ // satisfy dominance.
+ callee2caller[cpi->result_id()] = guard_block_id;
+ }
+ // If callee has multiple returns, insert a header block for
// single-trip loop that will encompass callee code. Start postheader
// block.
+ //
+ // Note: Consider the following combination:
+ // - the caller is a single block loop
+ // - the callee does not begin with a structure header
+ // - the callee has multiple returns.
+ // We still need to split the caller block and insert a guard block.
+ // But we only need to do it once. We haven't done it yet, but the
+ // single-trip loop header will serve the same purpose.
if (multiReturn) {
singleTripLoopHeaderId = this->TakeNextId();
AddBranch(singleTripLoopHeaderId, &new_blk_ptr);
default: {
// Copy callee instruction and remap all input Ids.
std::unique_ptr<ir::Instruction> cp_inst(new ir::Instruction(*cpi));
- cp_inst->ForEachInId([&callee2caller, &cpi, &callee_result_ids, this]
- (uint32_t* iid) {
+ cp_inst->ForEachInId([&callee2caller, &callee_result_ids,
+ this](uint32_t* iid) {
const auto mapItr = callee2caller.find(*iid);
if (mapItr != callee2caller.end()) {
*iid = mapItr->second;
} break;
}
});
+
+ if (caller_is_single_block_loop && (new_blocks->size() > 1)) {
+ // Move the OpLoopMerge from the last block back to the first, where
+ // it belongs. Also, update its continue target to point to the last
+ // block.
+ auto& first = new_blocks->front();
+ auto& last = new_blocks->back();
+ assert(first != last);
+
+ // Insert a modified copy of the loop merge into the first block.
+ auto loop_merge_itr = last->tail();
+ --loop_merge_itr;
+ assert(loop_merge_itr->opcode() == SpvOpLoopMerge);
+ std::unique_ptr<ir::Instruction> cp_inst(new ir::Instruction(*loop_merge_itr));
+ cp_inst->SetInOperand(kSpvLoopMergeContinueTargetIdInIdx, {last->id()});
+ first->tail().InsertBefore(std::move(cp_inst));
+
+ // Remove the loop merge from the last block.
+ loop_merge_itr.Erase();
+ }
+
// Update block map given replacement blocks.
for (auto& blk : *new_blocks) {
id2block_[blk->id()] = &*blk;
// done validly if the return was not in a loop in the original function.
// Also remember functions with multiple (early) returns.
AnalyzeReturns(func);
- return no_return_in_loop_.find(func->result_id()) !=
- no_return_in_loop_.cend();
+ return no_return_in_loop_.find(func->result_id()) !=
+ no_return_in_loop_.cend();
}
void InlinePass::InitializeInline(ir::Module* module) {
assembly, assembly, false, true);
}
+TEST_F(InlineTest, SingleBlockLoopCallsMultiBlockCallee) {
+ // Example from https://github.com/KhronosGroup/SPIRV-Tools/issues/787
+ //
+ // CFG structure is:
+ // foo:
+ // fooentry -> fooexit
+ //
+ // main:
+ // entry -> loop
+ // loop -> loop, merge
+ // loop calls foo()
+ // merge
+ //
+ // Since the callee has multiple blocks, it will split the calling block
+ // into at least two, resulting in a new "back-half" block that contains
+ // the instructions after the inlined function call. If the calling block
+ // has an OpLoopMerge that points back to the calling block itself, then
+ // the OpLoopMerge can't remain in the back-half block, but must be
+ // moved to the end of the original calling block, and it continue target
+ // operand updated to point to the back-half block.
+
+ const std::string predefs =
+ R"(OpCapability Shader
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %1 "main"
+OpSource OpenCL_C 120
+%bool = OpTypeBool
+%true = OpConstantTrue %bool
+%void = OpTypeVoid
+)";
+
+ const std::string nonEntryFuncs =
+ R"(%5 = OpTypeFunction %void
+%6 = OpFunction %void None %5
+%7 = OpLabel
+OpBranch %8
+%8 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+ const std::string before =
+ R"(%1 = OpFunction %void None %5
+%9 = OpLabel
+OpBranch %10
+%10 = OpLabel
+%11 = OpFunctionCall %void %6
+OpLoopMerge %12 %10 None
+OpBranchConditional %true %10 %12
+%12 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+ const std::string after =
+ R"(%1 = OpFunction %void None %5
+%9 = OpLabel
+OpBranch %10
+%10 = OpLabel
+OpLoopMerge %12 %13 None
+OpBranch %13
+%13 = OpLabel
+OpBranchConditional %true %10 %12
+%12 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+ SinglePassRunAndCheck<opt::InlineExhaustivePass>(
+ predefs + nonEntryFuncs + before, predefs + nonEntryFuncs + after, false,
+ true);
+}
+
+TEST_F(InlineTest, SingleBlockLoopCallsMultiBlockCalleeHavingSelectionMerge) {
+ // This is similar to SingleBlockLoopCallsMultiBlockCallee except
+ // that calleee block also has a merge instruction in its first block.
+ // That merge instruction must be an OpSelectionMerge (because the entry
+ // block of a function can't be the header of a loop since the entry
+ // block can't be the target of a branch).
+ //
+ // In this case the OpLoopMerge can't be placed in the same block as
+ // the OpSelectionMerge, so inlining must create a new block to contain
+ // the callee contents.
+ //
+ // Additionally, we have two dummy OpCopyObject instructions to prove that
+ // the OpLoopMerge is moved to the right location.
+ //
+ // Also ensure that OpPhis within the cloned callee code are valid.
+ // We need to test that the predecessor blocks are remapped correctly so that
+ // dominance rules are satisfied
+
+ const std::string predefs =
+ R"(OpCapability Shader
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %1 "main"
+OpSource OpenCL_C 120
+%bool = OpTypeBool
+%true = OpConstantTrue %bool
+%false = OpConstantFalse %bool
+%void = OpTypeVoid
+%6 = OpTypeFunction %void
+)";
+
+ // This callee has multiple blocks, and an OpPhi in the last block
+ // that references a value from the first block. This tests that
+ // cloned block IDs are remapped appropriately. The OpPhi dominance
+ // requires that the remapped %9 must be in a block that dominates
+ // the remapped %8.
+ const std::string nonEntryFuncs =
+ R"(%7 = OpFunction %void None %6
+%8 = OpLabel
+%9 = OpCopyObject %bool %true
+OpSelectionMerge %10 None
+OpBranchConditional %true %10 %10
+%10 = OpLabel
+%11 = OpPhi %bool %9 %8
+OpReturn
+OpFunctionEnd
+)";
+
+ const std::string before =
+ R"(%1 = OpFunction %void None %6
+%12 = OpLabel
+OpBranch %13
+%13 = OpLabel
+%14 = OpCopyObject %bool %false
+%15 = OpFunctionCall %void %7
+OpLoopMerge %16 %13 None
+OpBranchConditional %true %13 %16
+%16 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+ // Note the remapped Phi uses %17 as the parent instead
+ // of %13, demonstrating that the parent block has been remapped
+ // correctly.
+ const std::string after =
+ R"(%1 = OpFunction %void None %6
+%12 = OpLabel
+OpBranch %13
+%13 = OpLabel
+%14 = OpCopyObject %bool %false
+OpLoopMerge %16 %19 None
+OpBranch %17
+%17 = OpLabel
+%18 = OpCopyObject %bool %true
+OpSelectionMerge %19 None
+OpBranchConditional %true %19 %19
+%19 = OpLabel
+%20 = OpPhi %bool %18 %17
+OpBranchConditional %true %13 %16
+%16 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+ SinglePassRunAndCheck<opt::InlineExhaustivePass>(
+ predefs + nonEntryFuncs + before, predefs + nonEntryFuncs + after, false,
+ true);
+}
+
+TEST_F(InlineTest, SingleBlockLoopCallsMultiBlockCalleeHavingSelectionMergeAndMultiReturns) {
+ // This is similar to SingleBlockLoopCallsMultiBlockCalleeHavingSelectionMerge
+ // except that in addition to starting with a selection header, the
+ // callee also has multi returns.
+ //
+ // So now we have to accommodate:
+ // - The caller's OpLoopMerge (which must move to the first block)
+ // - The single-trip loop to wrap the multi returns, and
+ // - The callee's selection merge in its first block.
+ // Each of these must go into their own blocks.
+
+ const std::string predefs =
+ R"(OpCapability Shader
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %1 "main"
+OpSource OpenCL_C 120
+%bool = OpTypeBool
+%int = OpTypeInt 32 1
+%true = OpConstantTrue %bool
+%false = OpConstantFalse %bool
+%int_0 = OpConstant %int 0
+%int_1 = OpConstant %int 1
+%int_2 = OpConstant %int 2
+%int_3 = OpConstant %int 3
+%int_4 = OpConstant %int 4
+%void = OpTypeVoid
+%12 = OpTypeFunction %void
+)";
+
+ const std::string nonEntryFuncs =
+ R"(%13 = OpFunction %void None %12
+%14 = OpLabel
+%15 = OpCopyObject %int %int_0
+OpReturn
+%16 = OpLabel
+%17 = OpCopyObject %int %int_1
+OpReturn
+OpFunctionEnd
+)";
+
+ const std::string before =
+ R"(%1 = OpFunction %void None %12
+%18 = OpLabel
+OpBranch %19
+%19 = OpLabel
+%20 = OpCopyObject %int %int_2
+%21 = OpFunctionCall %void %13
+%22 = OpCopyObject %int %int_3
+OpLoopMerge %23 %19 None
+OpBranchConditional %true %19 %23
+%23 = OpLabel
+%24 = OpCopyObject %int %int_4
+OpReturn
+OpFunctionEnd
+)";
+
+ const std::string after =
+ R"(%1 = OpFunction %void None %12
+%18 = OpLabel
+OpBranch %19
+%19 = OpLabel
+%20 = OpCopyObject %int %int_2
+OpLoopMerge %23 %26 None
+OpBranch %25
+%25 = OpLabel
+OpLoopMerge %26 %27 None
+OpBranch %28
+%28 = OpLabel
+%29 = OpCopyObject %int %int_0
+OpBranch %26
+%30 = OpLabel
+%31 = OpCopyObject %int %int_1
+OpBranch %26
+%27 = OpLabel
+OpBranchConditional %false %25 %26
+%26 = OpLabel
+%22 = OpCopyObject %int %int_3
+OpBranchConditional %true %19 %23
+%23 = OpLabel
+%24 = OpCopyObject %int %int_4
+OpReturn
+OpFunctionEnd
+)";
+
+ SinglePassRunAndCheck<opt::InlineExhaustivePass>(
+ predefs + nonEntryFuncs + before, predefs + nonEntryFuncs + after, false,
+ true);
+}
+
// TODO(greg-lunarg): Add tests to verify handling of these cases:
//
// Empty modules