From d0742ac2e531e3dc38ca22b200059cfaef85d838 Mon Sep 17 00:00:00 2001 From: "Wang, Xin10" Date: Tue, 10 Jan 2023 06:02:49 -0800 Subject: [PATCH] [X86][CodeGen]Fix extract f16 from big vectors When use llc -mattr=+avx512fp16, it will crash. ``` define half @test(<64 x half> %x, i64 %idx){ %res = extractelement <64 x half> %x, i64 %idx ret half %res } ``` The root cause is when we enable avx512fp16 we lose custom handler for extract f16 from big vectors which is not loaded from pointer. Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D141348 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 17 +++++++- llvm/test/CodeGen/X86/avx512fp16-mov.ll | 76 +++++++++++++++++++++++++++++++-- 2 files changed, 88 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 58d8da1..bed3e94 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34113,7 +34113,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(V); return; } - case ISD::BITREVERSE: + case ISD::BITREVERSE: { assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); assert(Subtarget.hasXOP() && "Expected XOP"); // We can use VPPERM by copying to a vector register and back. We'll need @@ -34121,6 +34121,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG)); return; } + case ISD::EXTRACT_VECTOR_ELT: { + // f16 = extract vXf16 %vec, i64 %idx + assert(N->getSimpleValueType(0) == MVT::f16 && + "Unexpected Value type of EXTRACT_VECTOR_ELT!"); + assert(Subtarget.hasFP16() && "Expected FP16"); + SDValue VecOp = N->getOperand(0); + EVT ExtVT = VecOp.getValueType().changeVectorElementTypeToInteger(); + SDValue Split = DAG.getBitcast(ExtVT, N->getOperand(0)); + Split = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Split, + N->getOperand(1)); + Split = DAG.getBitcast(MVT::f16, Split); + Results.push_back(Split); + return; + } + } } const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll index 09706f0..9cdb47b 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll @@ -1349,6 +1349,74 @@ define half @extract_f16_7(<8 x half> %x) { ret half %res } +define half @extract_f16_8(<32 x half> %x, i64 %idx) nounwind { +; X64-LABEL: extract_f16_8: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: movq %rsp, %rbp +; X64-NEXT: andq $-64, %rsp +; X64-NEXT: subq $128, %rsp +; X64-NEXT: andl $31, %edi +; X64-NEXT: vmovaps %zmm0, (%rsp) +; X64-NEXT: vmovsh (%rsp,%rdi,2), %xmm0 +; X64-NEXT: movq %rbp, %rsp +; X64-NEXT: popq %rbp +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: extract_f16_8: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-64, %esp +; X86-NEXT: subl $128, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: andl $31, %eax +; X86-NEXT: vmovaps %zmm0, (%esp) +; X86-NEXT: vmovsh (%esp,%eax,2), %xmm0 +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl + %res = extractelement <32 x half> %x, i64 %idx + ret half %res +} + +define half @extract_f16_9(<64 x half> %x, i64 %idx) nounwind { +; X64-LABEL: extract_f16_9: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: movq %rsp, %rbp +; X64-NEXT: andq $-64, %rsp +; X64-NEXT: subq $192, %rsp +; X64-NEXT: andl $63, %edi +; X64-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp) +; X64-NEXT: vmovaps %zmm0, (%rsp) +; X64-NEXT: vmovsh (%rsp,%rdi,2), %xmm0 +; X64-NEXT: movq %rbp, %rsp +; X64-NEXT: popq %rbp +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: extract_f16_9: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-64, %esp +; X86-NEXT: subl $192, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: andl $63, %eax +; X86-NEXT: vmovaps %zmm1, {{[0-9]+}}(%esp) +; X86-NEXT: vmovaps %zmm0, (%esp) +; X86-NEXT: vmovsh (%esp,%eax,2), %xmm0 +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl + %res = extractelement <64 x half> %x, i64 %idx + ret half %res +} + define i16 @extract_i16_0(<8 x i16> %x) { ; CHECK-LABEL: extract_i16_0: ; CHECK: # %bb.0: @@ -1985,10 +2053,10 @@ define void @pr52560(i8 %0, <2 x i16> %1, ptr %c) nounwind { ; X64-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: vmovw %xmm0, %eax ; X64-NEXT: testw %ax, %ax -; X64-NEXT: je .LBB121_2 +; X64-NEXT: je .LBB123_2 ; X64-NEXT: # %bb.1: # %for.body.preheader ; X64-NEXT: movb $0, (%rsi) -; X64-NEXT: .LBB121_2: # %for.end +; X64-NEXT: .LBB123_2: # %for.end ; X64-NEXT: retq ; ; X86-LABEL: pr52560: @@ -2000,11 +2068,11 @@ define void @pr52560(i8 %0, <2 x i16> %1, ptr %c) nounwind { ; X86-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: vmovw %xmm0, %eax ; X86-NEXT: testw %ax, %ax -; X86-NEXT: je .LBB121_2 +; X86-NEXT: je .LBB123_2 ; X86-NEXT: # %bb.1: # %for.body.preheader ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb $0, (%eax) -; X86-NEXT: .LBB121_2: # %for.end +; X86-NEXT: .LBB123_2: # %for.end ; X86-NEXT: retl entry: %conv = sext i8 %0 to i16 -- 2.7.4