Results.push_back(V);
return;
}
- case ISD::BITREVERSE:
+ case ISD::BITREVERSE: {
assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
assert(Subtarget.hasXOP() && "Expected XOP");
// We can use VPPERM by copying to a vector register and back. We'll need
Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG));
return;
}
+ case ISD::EXTRACT_VECTOR_ELT: {
+ // f16 = extract vXf16 %vec, i64 %idx
+ assert(N->getSimpleValueType(0) == MVT::f16 &&
+ "Unexpected Value type of EXTRACT_VECTOR_ELT!");
+ assert(Subtarget.hasFP16() && "Expected FP16");
+ SDValue VecOp = N->getOperand(0);
+ EVT ExtVT = VecOp.getValueType().changeVectorElementTypeToInteger();
+ SDValue Split = DAG.getBitcast(ExtVT, N->getOperand(0));
+ Split = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Split,
+ N->getOperand(1));
+ Split = DAG.getBitcast(MVT::f16, Split);
+ Results.push_back(Split);
+ return;
+ }
+ }
}
const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
ret half %res
}
+define half @extract_f16_8(<32 x half> %x, i64 %idx) nounwind {
+; X64-LABEL: extract_f16_8:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbp
+; X64-NEXT: movq %rsp, %rbp
+; X64-NEXT: andq $-64, %rsp
+; X64-NEXT: subq $128, %rsp
+; X64-NEXT: andl $31, %edi
+; X64-NEXT: vmovaps %zmm0, (%rsp)
+; X64-NEXT: vmovsh (%rsp,%rdi,2), %xmm0
+; X64-NEXT: movq %rbp, %rsp
+; X64-NEXT: popq %rbp
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+;
+; X86-LABEL: extract_f16_8:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-64, %esp
+; X86-NEXT: subl $128, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: andl $31, %eax
+; X86-NEXT: vmovaps %zmm0, (%esp)
+; X86-NEXT: vmovsh (%esp,%eax,2), %xmm0
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+ %res = extractelement <32 x half> %x, i64 %idx
+ ret half %res
+}
+
+define half @extract_f16_9(<64 x half> %x, i64 %idx) nounwind {
+; X64-LABEL: extract_f16_9:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbp
+; X64-NEXT: movq %rsp, %rbp
+; X64-NEXT: andq $-64, %rsp
+; X64-NEXT: subq $192, %rsp
+; X64-NEXT: andl $63, %edi
+; X64-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
+; X64-NEXT: vmovaps %zmm0, (%rsp)
+; X64-NEXT: vmovsh (%rsp,%rdi,2), %xmm0
+; X64-NEXT: movq %rbp, %rsp
+; X64-NEXT: popq %rbp
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+;
+; X86-LABEL: extract_f16_9:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-64, %esp
+; X86-NEXT: subl $192, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: andl $63, %eax
+; X86-NEXT: vmovaps %zmm1, {{[0-9]+}}(%esp)
+; X86-NEXT: vmovaps %zmm0, (%esp)
+; X86-NEXT: vmovsh (%esp,%eax,2), %xmm0
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+ %res = extractelement <64 x half> %x, i64 %idx
+ ret half %res
+}
+
define i16 @extract_i16_0(<8 x i16> %x) {
; CHECK-LABEL: extract_i16_0:
; CHECK: # %bb.0:
; X64-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z}
; X64-NEXT: vmovw %xmm0, %eax
; X64-NEXT: testw %ax, %ax
-; X64-NEXT: je .LBB121_2
+; X64-NEXT: je .LBB123_2
; X64-NEXT: # %bb.1: # %for.body.preheader
; X64-NEXT: movb $0, (%rsi)
-; X64-NEXT: .LBB121_2: # %for.end
+; X64-NEXT: .LBB123_2: # %for.end
; X64-NEXT: retq
;
; X86-LABEL: pr52560:
; X86-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: vmovw %xmm0, %eax
; X86-NEXT: testw %ax, %ax
-; X86-NEXT: je .LBB121_2
+; X86-NEXT: je .LBB123_2
; X86-NEXT: # %bb.1: # %for.body.preheader
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb $0, (%eax)
-; X86-NEXT: .LBB121_2: # %for.end
+; X86-NEXT: .LBB123_2: # %for.end
; X86-NEXT: retl
entry:
%conv = sext i8 %0 to i16