[LoopVectorize][SVE] Fix crash when vectorising FP negation

author David Sherwood <david.sherwood@arm.com>

Fri, 5 Mar 2021 17:10:09 +0000 (17:10 +0000)

committer David Sherwood <david.sherwood@arm.com>

Wed, 28 Apr 2021 14:22:35 +0000 (15:22 +0100)
author David Sherwood <david.sherwood@arm.com>
Fri, 5 Mar 2021 17:10:09 +0000 (17:10 +0000)
committer David Sherwood <david.sherwood@arm.com>
Wed, 28 Apr 2021 14:22:35 +0000 (15:22 +0100)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index 2878101..2e249d7 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7549,7 +7549,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
          Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I);
    }
    case Instruction::FNeg: {
-    assert(!VF.isScalable() && "VF is assumed to be non scalable.");
      return TTI.getArithmeticInstrCost(
          I->getOpcode(), VectorTy, CostKind, TargetTransformInfo::OK_AnyValue,
          TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll

index d548ca7..ad6f42b 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
@@ -14,8 +14,7 @@ define void @cmpsel_i32(i32* noalias nocapture %a, i32* noalias nocapture readon
  ; CHECK:         store <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32>* {{.*}}, align 4
  ;
  entry:
-  %cmp7 = icmp sgt i64 %n, 0
-  br i1 %cmp7, label %for.body, label %for.end
+  br label %for.body
  
  for.body:                                         ; preds = %entry, %for.body
    %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
@@ -46,8 +45,7 @@ define void @cmpsel_f32(float* noalias nocapture %a, float* noalias nocapture re
  ; CHECK:         store <vscale x 4 x float> [[TMP2]], <vscale x 4 x float>* {{.*}}, align 4
  
  entry:
-  %cmp8 = icmp sgt i64 %n, 0
-  br i1 %cmp8, label %for.body, label %for.end
+  br label %for.body
  
  for.body:                                         ; preds = %entry, %for.body
    %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
@@ -59,7 +57,33 @@ for.body:                                         ; preds = %entry, %for.body
    store float %conv, float* %arrayidx3, align 4
    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    %exitcond.not = icmp eq i64 %indvars.iv.next, %n
-  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !6
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @fneg_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {
+; CHECK-LABEL: @fneg_f32(
+; CHECK-NEXT:  entry:
+; CHECK:       vector.body:
+; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* {{.*}}, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = fneg <vscale x 4 x float> [[WIDE_LOAD]]
+; CHECK:         store <vscale x 4 x float> [[TMP1]], <vscale x 4 x float>* {{.*}}, align 4
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
+  %0 = load float, float* %arrayidx, align 4
+  %fneg = fneg float %0
+  %arrayidx3 = getelementptr inbounds float, float* %a, i64 %indvars.iv
+  store float %fneg, float* %arrayidx3, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
  
  for.end:                                          ; preds = %for.body, %entry
    ret void
@@ -71,4 +95,3 @@ for.end:                                          ; preds = %for.body, %entry
  !3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
  !4 = !{!"llvm.loop.interleave.count", i32 1}
  !5 = !{!"llvm.loop.vectorize.enable", i1 true}
-!6 = distinct !{!6, !1, !2, !3, !4, !5}
author	David Sherwood <david.sherwood@arm.com>
	Fri, 5 Mar 2021 17:10:09 +0000 (17:10 +0000)
committer	David Sherwood <david.sherwood@arm.com>
	Wed, 28 Apr 2021 14:22:35 +0000 (15:22 +0100)
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll		patch \| blob \| history