From 85a2146c155953d5bdfb2e7e6ba9780fc2dab1b9 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 27 Oct 2019 15:26:46 -0400 Subject: [PATCH] [SDAG] fold insert_vector_elt with undef index Similar to: rG4c47617627fb This makes the DAG behavior consistent with IR's insertelement. https://bugs.llvm.org/show_bug.cgi?id=42689 I've tried to maintain test intent for AArch64 and WebAssembly by replacing undef index operands with something else. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 -- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 9 ++++ .../test/CodeGen/AArch64/sched-past-vector-ldst.ll | 24 +++++------ llvm/test/CodeGen/WebAssembly/simd.ll | 48 +++++++++++----------- llvm/test/CodeGen/X86/insertelement-var-index.ll | 16 ++++++++ 5 files changed, 60 insertions(+), 41 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f00e332..4594793 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16574,10 +16574,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue EltNo = N->getOperand(2); SDLoc DL(N); - // If the inserted element is an UNDEF, just use the input vector. - if (InVal.isUndef()) - return InVec; - EVT VT = InVec.getValueType(); unsigned NumElts = VT.getVectorNumElements(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 8ea7ce9..4ffc55c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5506,6 +5506,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) return getUNDEF(VT); + + // Undefined index can be assumed out-of-bounds, so that's UNDEF too. + if (N3.isUndef()) + return getUNDEF(VT); + + // If the inserted element is an UNDEF, just use the input vector. + if (N2.isUndef()) + return N1; + break; } case ISD::INSERT_SUBVECTOR: { diff --git a/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll b/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll index 5233346..4b42500 100644 --- a/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll @@ -7,6 +7,8 @@ ; CHECK: fmla +; CHECK-NEXT: mov +; CHECK-NEXT: mov ; CHECK-NEXT: fmla ; CHECK-NEXT: fmla ; CHECK-NEXT: fmla @@ -16,21 +18,17 @@ target triple = "aarch64--linux-gnu" %Struct = type { i64*, [9 x double], [16 x {float, float}], [16 x {float, float}], i32, i32 } ; Function Attrs: nounwind -define linkonce_odr void @func(%Struct* nocapture %this) unnamed_addr #0 align 2 { +define linkonce_odr void @func(%Struct* nocapture %this, <4 x float> %f) unnamed_addr #0 align 2 { entry: - %0 = insertelement <4 x float> undef, float undef, i32 0 - %1 = insertelement <4 x float> %0, float undef, i32 1 - %2 = insertelement <4 x float> %1, float undef, i32 2 - %3 = insertelement <4 x float> %2, float undef, i32 3 %scevgep = getelementptr %Struct, %Struct* %this, i64 0, i32 2, i64 8, i32 0 %struct_ptr = bitcast float* %scevgep to i8* %vec1 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0i8(i8* %struct_ptr) %ev1 = extractvalue { <4 x float>, <4 x float> } %vec1, 1 - %fm1 = fmul <4 x float> %0, %ev1 - %av1 = fadd <4 x float> %1, %fm1 + %fm1 = fmul <4 x float> %f, %ev1 + %av1 = fadd <4 x float> %f, %fm1 %ev2 = extractvalue { <4 x float>, <4 x float> } %vec1, 0 - %fm2 = fmul <4 x float> %2, %ev2 - %av2 = fadd <4 x float> %3, %fm2 + %fm2 = fmul <4 x float> %f, %ev2 + %av2 = fadd <4 x float> %f, %fm2 %scevgep2 = getelementptr %Struct, %Struct* %this, i64 0, i32 3, i64 8, i32 0 %struct_ptr2 = bitcast float* %scevgep2 to i8* tail call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> %av2, <4 x float> %av1, i8* %struct_ptr2) @@ -38,11 +36,11 @@ entry: %struct_ptr3 = bitcast float* %scevgep3 to i8* %vec2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0i8(i8* %struct_ptr3) %ev3 = extractvalue { <4 x float>, <4 x float> } %vec2, 1 - %fm3 = fmul <4 x float> %0, %ev3 - %av3 = fadd <4 x float> %1, %fm3 + %fm3 = fmul <4 x float> %f, %ev3 + %av3 = fadd <4 x float> %f, %fm3 %ev4 = extractvalue { <4 x float>, <4 x float> } %vec2, 0 - %fm4 = fmul <4 x float> %2, %ev4 - %av4 = fadd <4 x float> %3, %fm4 + %fm4 = fmul <4 x float> %f, %ev4 + %av4 = fadd <4 x float> %f, %fm4 %scevgep4 = getelementptr %Struct, %Struct* %this, i64 0, i32 3, i64 12, i32 0 %struct_ptr4 = bitcast float* %scevgep4 to i8* tail call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> %av4, <4 x float> %av3, i8* %struct_ptr4) diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll index e13646c..b2d0638 100644 --- a/llvm/test/CodeGen/WebAssembly/simd.ll +++ b/llvm/test/CodeGen/WebAssembly/simd.ll @@ -193,13 +193,13 @@ define <16 x i8> @replace_var_v16i8(<16 x i8> %v, i32 %i, i8 %x) { ret <16 x i8> %res } -; CHECK-LABEL: replace_undef_v16i8: +; CHECK-LABEL: replace_zero_v16i8: ; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype replace_undef_v16i8 (v128, i32) -> (v128){{$}} +; SIMD128-NEXT: .functype replace_zero_v16i8 (v128, i32) -> (v128){{$}} ; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} -define <16 x i8> @replace_undef_v16i8(<16 x i8> %v, i8 %x) { - %res = insertelement <16 x i8> %v, i8 %x, i32 undef +define <16 x i8> @replace_zero_v16i8(<16 x i8> %v, i8 %x) { + %res = insertelement <16 x i8> %v, i8 %x, i32 0 ret <16 x i8> %res } @@ -464,13 +464,13 @@ define <8 x i16> @replace_var_v8i16(<8 x i16> %v, i32 %i, i16 %x) { ret <8 x i16> %res } -; CHECK-LABEL: replace_undef_v8i16: +; CHECK-LABEL: replace_zero_v8i16: ; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype replace_undef_v8i16 (v128, i32) -> (v128){{$}} +; SIMD128-NEXT: .functype replace_zero_v8i16 (v128, i32) -> (v128){{$}} ; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} -define <8 x i16> @replace_undef_v8i16(<8 x i16> %v, i16 %x) { - %res = insertelement <8 x i16> %v, i16 %x, i32 undef +define <8 x i16> @replace_zero_v8i16(<8 x i16> %v, i16 %x) { + %res = insertelement <8 x i16> %v, i16 %x, i32 0 ret <8 x i16> %res } @@ -625,13 +625,13 @@ define <4 x i32> @replace_var_v4i32(<4 x i32> %v, i32 %i, i32 %x) { ret <4 x i32> %res } -; CHECK-LABEL: replace_undef_v4i32: +; CHECK-LABEL: replace_zero_v4i32: ; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype replace_undef_v4i32 (v128, i32) -> (v128){{$}} +; SIMD128-NEXT: .functype replace_zero_v4i32 (v128, i32) -> (v128){{$}} ; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} -define <4 x i32> @replace_undef_v4i32(<4 x i32> %v, i32 %x) { - %res = insertelement <4 x i32> %v, i32 %x, i32 undef +define <4 x i32> @replace_zero_v4i32(<4 x i32> %v, i32 %x) { + %res = insertelement <4 x i32> %v, i32 %x, i32 0 ret <4 x i32> %res } @@ -781,14 +781,14 @@ define <2 x i64> @replace_var_v2i64(<2 x i64> %v, i32 %i, i64 %x) { ret <2 x i64> %res } -; CHECK-LABEL: replace_undef_v2i64: +; CHECK-LABEL: replace_zero_v2i64: ; NO-SIMD128-NOT: i64x2 ; SIMD128-VM-NOT: i64x2 -; SIMD128-NEXT: .functype replace_undef_v2i64 (v128, i64) -> (v128){{$}} +; SIMD128-NEXT: .functype replace_zero_v2i64 (v128, i64) -> (v128){{$}} ; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} -define <2 x i64> @replace_undef_v2i64(<2 x i64> %v, i64 %x) { - %res = insertelement <2 x i64> %v, i64 %x, i32 undef +define <2 x i64> @replace_zero_v2i64(<2 x i64> %v, i64 %x) { + %res = insertelement <2 x i64> %v, i64 %x, i32 0 ret <2 x i64> %res } @@ -931,13 +931,13 @@ define <4 x float> @replace_var_v4f32(<4 x float> %v, i32 %i, float %x) { ret <4 x float> %res } -; CHECK-LABEL: replace_undef_v4f32: +; CHECK-LABEL: replace_zero_v4f32: ; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype replace_undef_v4f32 (v128, f32) -> (v128){{$}} +; SIMD128-NEXT: .functype replace_zero_v4f32 (v128, f32) -> (v128){{$}} ; SIMD128-NEXT: f32x4.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} -define <4 x float> @replace_undef_v4f32(<4 x float> %v, float %x) { - %res = insertelement <4 x float> %v, float %x, i32 undef +define <4 x float> @replace_zero_v4f32(<4 x float> %v, float %x) { + %res = insertelement <4 x float> %v, float %x, i32 0 ret <4 x float> %res } @@ -1086,14 +1086,14 @@ define <2 x double> @replace_var_v2f64(<2 x double> %v, i32 %i, double %x) { ret <2 x double> %res } -; CHECK-LABEL: replace_undef_v2f64: +; CHECK-LABEL: replace_zero_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-VM-NOT: f64x2 -; SIMD128-NEXT: .functype replace_undef_v2f64 (v128, f64) -> (v128){{$}} +; SIMD128-NEXT: .functype replace_zero_v2f64 (v128, f64) -> (v128){{$}} ; SIMD128-NEXT: f64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} -define <2 x double> @replace_undef_v2f64(<2 x double> %v, double %x) { - %res = insertelement <2 x double> %v, double %x, i32 undef +define <2 x double> @replace_zero_v2f64(<2 x double> %v, double %x) { + %res = insertelement <2 x double> %v, double %x, i32 0 ret <2 x double> %res } diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll index cbb2920..c6ab2cd 100644 --- a/llvm/test/CodeGen/X86/insertelement-var-index.ll +++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll @@ -3,6 +3,22 @@ ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX2 +define <16 x i8> @undef_index(i8 %x) nounwind { +; ALL-LABEL: undef_index: +; ALL: # %bb.0: +; ALL-NEXT: retq + %ins = insertelement <16 x i8> undef, i8 %x, i64 undef + ret <16 x i8> %ins +} + +define <16 x i8> @undef_scalar(<16 x i8> %x, i32 %index) nounwind { +; ALL-LABEL: undef_scalar: +; ALL: # %bb.0: +; ALL-NEXT: retq + %ins = insertelement <16 x i8> %x, i8 undef, i32 %index + ret <16 x i8> %ins +} + define <16 x i8> @arg_i8_v16i8(i8 %x, i32 %y) nounwind { ; SSE-LABEL: arg_i8_v16i8: ; SSE: # %bb.0: -- 2.7.4