From: Phoebe Wang Date: Sat, 14 Jan 2023 03:18:56 +0000 (+0800) Subject: [X86] Do not lower INSERT_VECTOR_ELT to vselect for vXf16 without BWI X-Git-Tag: upstream/17.0.6~20930 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=974ebe619d179756540527cd71294f86529ff9ec;p=platform%2Fupstream%2Fllvm.git [X86] Do not lower INSERT_VECTOR_ELT to vselect for vXf16 without BWI We cannot handle i8/i16/f16 vselect without BWI. Fixes #59980 Reviewed By: RKSimon, skan Differential Revision: https://reviews.llvm.org/D141668 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 49279ec..fa32b60 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20222,7 +20222,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // possible vector indices, and FP insertion has less gpr->simd traffic. if (!(Subtarget.hasBWI() || (Subtarget.hasAVX512() && EltSizeInBits >= 32) || - (Subtarget.hasSSE41() && VT.isFloatingPoint()))) + (Subtarget.hasSSE41() && (EltVT == MVT::f32 || EltVT == MVT::f64)))) return SDValue(); MVT IdxSVT = MVT::getIntegerVT(EltSizeInBits); diff --git a/llvm/test/CodeGen/X86/pr59980.ll b/llvm/test/CodeGen/X86/pr59980.ll new file mode 100644 index 0000000..1179c97 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr59980.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.15 | FileCheck %s + +%0 = type <{ half }> +%1 = type <{ <16 x half> }> + +define void @foo(ptr %0, ptr %1, ptr %2) #0 { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: andq $-32, %rsp +; CHECK-NEXT: subq $64, %rsp +; CHECK-NEXT: movl (%rdx), %eax +; CHECK-NEXT: andl $15, %eax +; CHECK-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: vmovups (%rsi), %ymm1 +; CHECK-NEXT: vmovaps %ymm1, (%rsp) +; CHECK-NEXT: vpextrw $0, %xmm0, (%rsp,%rax,2) +; CHECK-NEXT: vmovaps (%rsp), %ymm0 +; CHECK-NEXT: vmovups %ymm0, (%rsi) +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %4 = bitcast ptr %2 to ptr + %5 = load i64, ptr %4, align 8 + %6 = getelementptr inbounds %0, ptr %0, i64 0, i32 0 + %7 = load half, ptr %6, align 2 + %8 = getelementptr inbounds %1, ptr %1, i64 0, i32 0 + %9 = load <16 x half>, ptr %8, align 16 + %10 = trunc i64 %5 to i32 + %11 = insertelement <16 x half> %9, half %7, i32 %10 + store <16 x half> %11, ptr %8, align 16 + ret void +} + +attributes #0 = { nounwind "target-features"="+f16c" }