Fix two bugs in the pipeliner in renaming phis in the prolog and epilog

author Krzysztof Parzyszek <kparzysz@codeaurora.org>

Thu, 22 Dec 2016 18:49:55 +0000 (18:49 +0000)

committer Krzysztof Parzyszek <kparzysz@codeaurora.org>

Thu, 22 Dec 2016 18:49:55 +0000 (18:49 +0000)
author Krzysztof Parzyszek <kparzysz@codeaurora.org>
Thu, 22 Dec 2016 18:49:55 +0000 (18:49 +0000)
committer Krzysztof Parzyszek <kparzysz@codeaurora.org>
Thu, 22 Dec 2016 18:49:55 +0000 (18:49 +0000)
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp

index c908681..d9528cd 100644 (file)
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -2676,7 +2676,7 @@ void SwingSchedulerDAG::generateExistingPhis(
                replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
              continue;
            }
-        } else if (StageDiff > 0 &&
+        } else if (InKernel && StageDiff > 0 &&
                     VRMap[CurStageNum - StageDiff - np].count(LoopVal))
            PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
        }
@@ -3216,7 +3216,7 @@ unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
        // The previous name is defined in the current stage when the instruction
        // order is swapped.
        PrevVal = VRMap[StageNum][LoopVal];
-    else if (!LoopInst->isPHI())
+    else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
        // The loop value hasn't yet been scheduled.
        PrevVal = LoopVal;
      else if (StageNum == PhiStage + 1)
diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi10.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi10.ll

new file mode 100644 (file)

index 0000000..ff35e0b
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi10.ll
@@ -0,0 +1,88 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s
+; REQUIRES: asserts
+
+define void @test(i8* noalias nocapture readonly %src, i32 %srcStride) local_unnamed_addr #0 {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %src, i32 %srcStride
+  %add.ptr2 = getelementptr inbounds i8, i8* %add.ptr, i32 %srcStride
+  %add.ptr3 = getelementptr inbounds i8, i8* %add.ptr2, i32 %srcStride
+  br label %for.body9.epil
+
+for.body9.epil:
+  %inc.sink385.epil = phi i32 [ %add17.epil, %for.body9.epil ], [ 2, %entry ]
+  %sr.epil = phi i8 [ %0, %for.body9.epil ], [ undef, %entry ]
+  %sr431.epil = phi i8 [ %2, %for.body9.epil ], [ 0, %entry ]
+  %sr432.epil = phi i8 [ %sr431.epil, %for.body9.epil ], [ 0, %entry ]
+  %epil.iter = phi i32 [ %epil.iter.sub, %for.body9.epil ], [ undef, %entry ]
+  %sub11.epil = add i32 %inc.sink385.epil, -1
+  %add17.epil = add nuw i32 %inc.sink385.epil, 1
+  %conv19.epil = zext i8 %sr.epil to i32
+  %add21.epil = add i32 %inc.sink385.epil, 2
+  %arrayidx22.epil = getelementptr inbounds i8, i8* %src, i32 %add21.epil
+  %0 = load i8, i8* %arrayidx22.epil, align 1
+  %conv23.epil = zext i8 %0 to i32
+  %1 = load i8, i8* undef, align 1
+  %conv42.epil = zext i8 %1 to i32
+  %conv53.epil = zext i8 %sr432.epil to i32
+  %2 = load i8, i8* undef, align 1
+  %conv61.epil = zext i8 %2 to i32
+  %3 = load i8, i8* undef, align 1
+  %conv65.epil = zext i8 %3 to i32
+  %4 = load i8, i8* null, align 1
+  %conv69.epil = zext i8 %4 to i32
+  %5 = load i8, i8* undef, align 1
+  %conv72.epil = zext i8 %5 to i32
+  %6 = load i8, i8* undef, align 1
+  %conv76.epil = zext i8 %6 to i32
+  %7 = load i8, i8* undef, align 1
+  %conv80.epil = zext i8 %7 to i32
+  %8 = load i8, i8* undef, align 1
+  %conv84.epil = zext i8 %8 to i32
+  %9 = load i8, i8* undef, align 1
+  %conv88.epil = zext i8 %9 to i32
+  %10 = load i8, i8* undef, align 1
+  %conv91.epil = zext i8 %10 to i32
+  %11 = load i8, i8* undef, align 1
+  %conv95.epil = zext i8 %11 to i32
+  %12 = load i8, i8* undef, align 1
+  %conv99.epil = zext i8 %12 to i32
+  %add.epil = add nuw nsw i32 0, %conv19.epil
+  %add16.epil = add nuw nsw i32 %add.epil, 0
+  %add20.epil = add nuw nsw i32 %add16.epil, 0
+  %add24.epil = add nuw nsw i32 %add20.epil, 0
+  %add28.epil = add nuw nsw i32 %add24.epil, 0
+  %add32.epil = add nuw nsw i32 %add28.epil, 0
+  %add35.epil = add i32 %add32.epil, 0
+  %add39.epil = add i32 %add35.epil, 0
+  %add43.epil = add i32 %add39.epil, %conv53.epil
+  %add47.epil = add i32 %add43.epil, 0
+  %add51.epil = add i32 %add47.epil, 0
+  %add54.epil = add i32 %add51.epil, %conv23.epil
+  %add58.epil = add i32 %add54.epil, %conv42.epil
+  %add62.epil = add i32 %add58.epil, %conv61.epil
+  %add66.epil = add i32 %add62.epil, %conv65.epil
+  %add70.epil = add i32 %add66.epil, %conv69.epil
+  %add73.epil = add i32 %add70.epil, %conv72.epil
+  %add77.epil = add i32 %add73.epil, %conv76.epil
+  %add81.epil = add i32 %add77.epil, %conv80.epil
+  %add85.epil = add i32 %add81.epil, %conv84.epil
+  %add89.epil = add i32 %add85.epil, %conv88.epil
+  %add92.epil = add i32 %add89.epil, %conv91.epil
+  %add96.epil = add i32 %add92.epil, %conv95.epil
+  %add100.epil = add i32 %add96.epil, %conv99.epil
+  %mul.epil = mul nsw i32 %add100.epil, 2621
+  %add101.epil = add nsw i32 %mul.epil, 32768
+  %shr369.epil = lshr i32 %add101.epil, 16
+  %conv102.epil = trunc i32 %shr369.epil to i8
+  %arrayidx103.epil = getelementptr inbounds i8, i8* undef, i32 %inc.sink385.epil
+  store i8 %conv102.epil, i8* %arrayidx103.epil, align 1
+  %epil.iter.sub = add i32 %epil.iter, -1
+  %epil.iter.cmp = icmp eq i32 %epil.iter.sub, 0
+  br i1 %epil.iter.cmp, label %for.end, label %for.body9.epil
+
+for.end:
+  unreachable
+}
+
+attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv5" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/llvm/test/CodeGen/Hexagon/swp-prolog-phi4.ll b/llvm/test/CodeGen/Hexagon/swp-prolog-phi4.ll

new file mode 100644 (file)

index 0000000..5ed0514
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-prolog-phi4.ll
@@ -0,0 +1,65 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -verify-machineinstrs < %s
+
+; Test that the name rewriter code doesn't chase the Phi operands for
+; Phis that do not occur in the loop that is being pipelined.
+
+define void @test(i32 %srcStride) local_unnamed_addr #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %add.ptr3.pn = phi i8* [ undef, %entry ], [ %src4.0394, %for.end ]
+  %src2.0390 = phi i8* [ undef, %entry ], [ %add.ptr3.pn, %for.end ]
+  %src4.0394 = getelementptr inbounds i8, i8* %add.ptr3.pn, i32 %srcStride
+  %sri414 = load i8, i8* undef, align 1
+  br i1 undef, label %for.body9.epil, label %for.body9.preheader.new
+
+for.body9.preheader.new:
+  br label %for.body9.epil
+
+for.body9.epil:
+  %inc.sink385.epil = phi i32 [ %add17.epil, %for.body9.epil ], [ 2, %for.body ], [ undef, %for.body9.preheader.new ]
+  %sr420.epil = phi i8 [ undef, %for.body9.epil ], [ %sri414, %for.body ], [ undef, %for.body9.preheader.new ]
+  %sr421.epil = phi i8 [ %sr420.epil, %for.body9.epil ], [ undef, %for.body ], [ undef, %for.body9.preheader.new ]
+  %sr422.epil = phi i8 [ %sr421.epil, %for.body9.epil ], [ 0, %for.body ], [ undef, %for.body9.preheader.new ]
+  %epil.iter = phi i32 [ %epil.iter.sub, %for.body9.epil ], [ undef, %for.body9.preheader.new ], [ undef, %for.body ]
+  %add17.epil = add nuw i32 %inc.sink385.epil, 1
+  %add21.epil = add i32 %inc.sink385.epil, 2
+  %arrayidx22.epil = getelementptr inbounds i8, i8* undef, i32 %add21.epil
+  %conv27.epil = zext i8 %sr422.epil to i32
+  %0 = load i8, i8* null, align 1
+  %conv61.epil = zext i8 %0 to i32
+  %arrayidx94.epil = getelementptr inbounds i8, i8* %src4.0394, i32 %add17.epil
+  %1 = load i8, i8* %arrayidx94.epil, align 1
+  %add35.epil = add i32 0, %conv27.epil
+  %add39.epil = add i32 %add35.epil, 0
+  %add43.epil = add i32 %add39.epil, 0
+  %add47.epil = add i32 %add43.epil, 0
+  %add51.epil = add i32 %add47.epil, 0
+  %add54.epil = add i32 %add51.epil, 0
+  %add58.epil = add i32 %add54.epil, 0
+  %add62.epil = add i32 %add58.epil, %conv61.epil
+  %add66.epil = add i32 %add62.epil, 0
+  %add70.epil = add i32 %add66.epil, 0
+  %add73.epil = add i32 %add70.epil, 0
+  %add77.epil = add i32 %add73.epil, 0
+  %add81.epil = add i32 %add77.epil, 0
+  %add85.epil = add i32 %add81.epil, 0
+  %add89.epil = add i32 %add85.epil, 0
+  %add92.epil = add i32 %add89.epil, 0
+  %add96.epil = add i32 %add92.epil, 0
+  %add100.epil = add i32 %add96.epil, 0
+  %mul.epil = mul nsw i32 %add100.epil, 2621
+  %add101.epil = add nsw i32 %mul.epil, 32768
+  %shr369.epil = lshr i32 %add101.epil, 16
+  %conv102.epil = trunc i32 %shr369.epil to i8
+  store i8 %conv102.epil, i8* undef, align 1
+  %epil.iter.sub = add i32 %epil.iter, -1
+  %epil.iter.cmp = icmp eq i32 %epil.iter.sub, 0
+  br i1 %epil.iter.cmp, label %for.end, label %for.body9.epil
+
+for.end:
+  br label %for.body
+}
+
+attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv5" "unsafe-fp-math"="false" "use-soft-float"="false" }
author	Krzysztof Parzyszek <kparzysz@codeaurora.org>
	Thu, 22 Dec 2016 18:49:55 +0000 (18:49 +0000)
committer	Krzysztof Parzyszek <kparzysz@codeaurora.org>
	Thu, 22 Dec 2016 18:49:55 +0000 (18:49 +0000)
llvm/lib/CodeGen/MachinePipeliner.cpp		patch \| blob \| history
llvm/test/CodeGen/Hexagon/swp-epilog-phi10.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/Hexagon/swp-prolog-phi4.ll	[new file with mode: 0644]	patch \| blob