From 3567908d8ceb95afe50961c7a953c202131235c5 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 30 Dec 2020 11:27:23 -0500 Subject: [PATCH] [SLP] add fadd reduction test to show broken FMF propagation; NFC --- .../Transforms/SLPVectorizer/X86/horizontal.ll | 35 ++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll index 5663c88..8e175f1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -1766,4 +1766,39 @@ bb.1: ret void } +; FIXME: This is a miscompile. +; The FMF on the reduction should match the incoming insts. + +define float @fadd_v4f32_fmf(float* %p) { +; CHECK-LABEL: @fadd_v4f32_fmf( +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) +; CHECK-NEXT: ret float [[TMP3]] +; +; STORE-LABEL: @fadd_v4f32_fmf( +; STORE-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; STORE-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; STORE-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; STORE-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) +; STORE-NEXT: ret float [[TMP3]] +; + %p1 = getelementptr inbounds float, float* %p, i64 1 + %p2 = getelementptr inbounds float, float* %p, i64 2 + %p3 = getelementptr inbounds float, float* %p, i64 3 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %p1, align 4 + %t2 = load float, float* %p2, align 4 + %t3 = load float, float* %p3, align 4 + %add1 = fadd reassoc nsz float %t1, %t0 + %add2 = fadd reassoc nsz float %t2, %add1 + %add3 = fadd reassoc nsz float %t3, %add2 + ret float %add3 +} + declare i32 @__gxx_personality_v0(...) -- 2.7.4