!0 = !{!"llvm.loop.vectorize.predicate.enable", i1 0}
!1 = !{!"llvm.loop.vectorize.predicate.enable", i1 1}
+'``llvm.loop.vectorize.scalable.enable``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata selectively enables or disables scalable vectorization for the
+loop, and only has any effect if vectorization for the loop is already enabled.
+The first operand is the string ``llvm.loop.vectorize.scalable.enable``
+and the second operand is a bit. If the bit operand value is 1 scalable
+vectorization is enabled, whereas a value of 0 reverts to the default fixed
+width vectorization:
+
+.. code-block:: llvm
+
+ !0 = !{!"llvm.loop.vectorize.scalable.enable", i1 0}
+ !1 = !{!"llvm.loop.vectorize.scalable.enable", i1 1}
+
'``llvm.loop.vectorize.width``' Metadata
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/// Find named metadata for a loop with an integer value.
llvm::Optional<int> getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name);
+/// Find a combination of metadata ("llvm.loop.vectorize.width" and
+/// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a
+/// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found
+/// then None is returned.
+Optional<ElementCount>
+getOptionalElementCountLoopAttribute(Loop *TheLoop);
+
/// Create a new loop identifier for a loop created from a loop transformation.
///
/// @param OrigLoopID The loop ID of the loop before the transformation.
#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Support/TypeSize.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
namespace llvm {
/// for example 'force', means a decision has been made. So, we need to be
/// careful NOT to add them if the user hasn't specifically asked so.
class LoopVectorizeHints {
- enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED,
- HK_PREDICATE };
+ enum HintKind {
+ HK_WIDTH,
+ HK_UNROLL,
+ HK_FORCE,
+ HK_ISVECTORIZED,
+ HK_PREDICATE,
+ HK_SCALABLE
+ };
/// Hint - associates name and validation with the hint value.
struct Hint {
/// Vector Predicate
Hint Predicate;
+ /// Says whether we should use fixed width or scalable vectorization.
+ Hint Scalable;
+
/// Return the loop metadata prefix.
static StringRef Prefix() { return "llvm.loop."; }
/// Dumps all the hint information.
void emitRemarkWithHints() const;
- unsigned getWidth() const { return Width.Value; }
+ ElementCount getWidth() const {
+ return ElementCount::get(Width.Value, isScalable());
+ }
unsigned getInterleave() const { return Interleave.Value; }
unsigned getIsVectorized() const { return IsVectorized.Value; }
unsigned getPredicate() const { return Predicate.Value; }
return (ForceKind)Force.Value;
}
+ bool isScalable() const { return Scalable.Value; }
+
/// If hints are provided that force vectorization, use the AlwaysPrint
/// pass name to force the frontend to print the diagnostic.
const char *vectorizeAnalysisPassName() const;
// enabled by default because can be unsafe or inefficient. For example,
// reordering floating-point operations will change the way round-off
// error accumulates in the loop.
- return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
+ ElementCount EC = getWidth();
+ return getForce() == LoopVectorizeHints::FK_Enabled ||
+ EC.getKnownMinValue() > 1;
}
bool isPotentiallyUnsafe() const {
if (hasVectorizeTransformation(L) == TM_ForcedByUser) {
LLVM_DEBUG(dbgs() << "Leftover vectorization transformation\n");
- Optional<int> VectorizeWidth =
- getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
+ Optional<ElementCount> VectorizeWidth =
+ getOptionalElementCountLoopAttribute(L);
Optional<int> InterleaveCount =
getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
- if (VectorizeWidth.getValueOr(0) != 1)
+ if (!VectorizeWidth || VectorizeWidth->isVector())
ORE->emit(
DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
"FailedRequestedVectorization",
return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
}
+Optional<ElementCount>
+llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {
+ Optional<int> Width =
+ getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
+
+ if (Width.hasValue()) {
+ Optional<int> IsScalable = getOptionalIntLoopAttribute(
+ TheLoop, "llvm.loop.vectorize.scalable.enable");
+ return ElementCount::get(*Width,
+ IsScalable.hasValue() ? *IsScalable : false);
+ }
+
+ return None;
+}
+
llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
StringRef Name) {
const MDOperand *AttrMD =
if (Enable == false)
return TM_SuppressedByUser;
- Optional<int> VectorizeWidth =
- getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
+ Optional<ElementCount> VectorizeWidth =
+ getOptionalElementCountLoopAttribute(L);
Optional<int> InterleaveCount =
getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
// 'Forcing' vector width and interleave count to one effectively disables
// this tranformation.
- if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1)
+ if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
+ InterleaveCount == 1)
return TM_SuppressedByUser;
if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
if (Enable == true)
return TM_ForcedByUser;
- if (VectorizeWidth == 1 && InterleaveCount == 1)
+ if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
return TM_Disable;
- if (VectorizeWidth > 1 || InterleaveCount > 1)
+ if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
return TM_Enable;
if (hasDisableAllTransformsHint(L))
return (Val <= 1);
case HK_ISVECTORIZED:
case HK_PREDICATE:
+ case HK_SCALABLE:
return (Val == 0 || Val == 1);
}
return false;
Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
Force("vectorize.enable", FK_Undefined, HK_FORCE),
IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
- Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
+ Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
+ Scalable("vectorize.scalable.enable", false, HK_SCALABLE), TheLoop(L),
ORE(ORE) {
// Populate values with existing loop metadata.
getHintsFromMetadata();
// If the vectorization width and interleaving count are both 1 then
// consider the loop to have been already vectorized because there's
// nothing more that we can do.
- IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
+ IsVectorized.Value =
+ getWidth() == ElementCount::getFixed(1) && Interleave.Value == 1;
LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
<< "LV: Interleaving disabled by the pass manager\n");
}
if (Force.Value == LoopVectorizeHints::FK_Enabled) {
R << " (Force=" << NV("Force", true);
if (Width.Value != 0)
- R << ", Vector Width=" << NV("VectorWidth", Width.Value);
+ R << ", Vector Width=" << NV("VectorWidth", getWidth());
if (Interleave.Value != 0)
R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
R << ")";
}
const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
- if (getWidth() == 1)
+ if (getWidth() == ElementCount::getFixed(1))
return LV_NAME;
if (getForce() == LoopVectorizeHints::FK_Disabled)
return LV_NAME;
- if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
+ if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero())
return LV_NAME;
return OptimizationRemarkAnalysis::AlwaysPrint;
}
return;
unsigned Val = C->getZExtValue();
- Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
+ Hint *Hints[] = {&Width, &Interleave, &Force,
+ &IsVectorized, &Predicate, &Scalable};
for (auto H : Hints) {
if (Name == H->Name) {
if (H->validate(Val))
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE);
// Get user vectorization factor.
- const unsigned UserVF = Hints.getWidth();
+ ElementCount UserVF = Hints.getWidth();
+ if (UserVF.isScalable()) {
+ // TODO: Use scalable UserVF once we've added initial support for scalable
+ // vectorization. For now we convert it to fixed width, but this will be
+ // removed in a later patch.
+ UserVF = ElementCount::getFixed(UserVF.getKnownMinValue());
+ }
// Plan how to best vectorize, return the best VF and its cost.
- const VectorizationFactor VF =
- LVP.planInVPlanNativePath(ElementCount::getFixed(UserVF));
+ const VectorizationFactor VF = LVP.planInVPlanNativePath(UserVF);
// If we are stress testing VPlan builds, do not attempt to generate vector
// code. Masked vector code generation support will follow soon.
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE);
// Get user vectorization factor and interleave count.
- unsigned UserVF = Hints.getWidth();
+ ElementCount UserVF = Hints.getWidth();
+ if (UserVF.isScalable()) {
+ // TODO: Use scalable UserVF once we've added initial support for scalable
+ // vectorization. For now we convert it to fixed width, but this will be
+ // removed in a later patch.
+ UserVF = ElementCount::getFixed(UserVF.getKnownMinValue());
+ }
+
unsigned UserIC = Hints.getInterleave();
// Plan how to best vectorize, return the best VF and its cost.
- Optional<VectorizationFactor> MaybeVF =
- LVP.plan(ElementCount::getFixed(UserVF), UserIC);
+ Optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC);
VectorizationFactor VF = VectorizationFactor::Disabled();
unsigned IC = 1;
ret void
}
+; CHECK-LABEL: @test2(
+; CHECK: store <8 x i32>
+; CHECK: ret void
+define void @test2(i32* nocapture %a, i32 %n) #0 {
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = trunc i64 %indvars.iv to i32
+ store i32 %0, i32* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; CHECK-LABEL: @test3(
+; CHECK: store <8 x i32>
+; CHECK: ret void
+define void @test3(i32* nocapture %a, i32 %n) #0 {
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = trunc i64 %indvars.iv to i32
+ store i32 %0, i32* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
!0 = !{!0, !1}
!1 = !{!"llvm.loop.vectorize.width", i32 8}
+!2 = !{!2, !1, !3}
+!3 = !{!"llvm.loop.vectorize.scalable.enable", i32 1}
+!4 = !{!4, !1, !5}
+!5 = !{!"llvm.loop.vectorize.scalable.enable", i32 0}
--- /dev/null
+; RUN: opt < %s -loop-vectorize -transform-warning -S 2>&1 | FileCheck %s
+
+; Like no_array_bounds.ll we verify warnings are generated when vectorization/interleaving is
+; explicitly specified and fails to occur for both fixed and scalable vectorize.width loop hints.
+
+; #pragma clang loop vectorize(enable)
+; for (int i = 0; i < number; i++) {
+; A[B[i]]++;
+; }
+
+; CHECK: warning: <unknown>:0:0: loop not interleaved: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+define dso_local void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %N) {
+entry:
+ %cmp7 = icmp sgt i32 %N, 0
+ br i1 %cmp7, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext i32 %N to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %idxprom1 = sext i32 %0 to i64
+ %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
+ %1 = load i32, i32* %arrayidx2, align 4
+ %inc = add nsw i32 %1, 1
+ store i32 %inc, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; CHECK: warning: <unknown>:0:0: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+define dso_local void @foo2(i32* nocapture %A, i32* nocapture readonly %B, i32 %N) {
+entry:
+ %cmp7 = icmp sgt i32 %N, 0
+ br i1 %cmp7, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext i32 %N to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %idxprom1 = sext i32 %0 to i64
+ %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
+ %1 = load i32, i32* %arrayidx2, align 4
+ %inc = add nsw i32 %1, 1
+ store i32 %inc, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !3
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
+!2 = !{!"llvm.loop.vectorize.width", i32 1}
+!3 = distinct !{!3, !1, !2, !4}
+!4 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}