return FPOpcode;
}
-/// If both input operands of a logic op are being cast from floating point
-/// types, try to convert this into a floating point logic node to avoid
-/// unnecessary moves from SSE to integer registers.
+/// If both input operands of a logic op are being cast from floating-point
+/// types or FP compares, try to convert this into a floating-point logic node
+/// to avoid unnecessary moves from SSE to integer registers.
static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
- if (N0.getOpcode() != ISD::BITCAST || N1.getOpcode() != ISD::BITCAST)
- return SDValue();
-
- if (DCI.isBeforeLegalizeOps())
+ if (!((N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) ||
+ (N0.getOpcode() == ISD::SETCC && N1.getOpcode() == ISD::SETCC)))
return SDValue();
SDValue N00 = N0.getOperand(0);
(Subtarget.hasFP16() && N00Type == MVT::f16)))
return SDValue();
- unsigned FPOpcode = convertIntLogicToFPLogicOpcode(N->getOpcode());
- SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
- return DAG.getBitcast(VT, FPLogic);
+ if (N0.getOpcode() == ISD::BITCAST && !DCI.isBeforeLegalizeOps()) {
+ unsigned FPOpcode = convertIntLogicToFPLogicOpcode(N->getOpcode());
+ SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
+ return DAG.getBitcast(VT, FPLogic);
+ }
+
+ // The vector ISA for FP predicates is incomplete before AVX, so converting
+ // COMIS* to CMPS* may not be a win before AVX.
+ // TODO: Check types/predicates to see if they are available with SSE/SSE2.
+ if (!Subtarget.hasAVX() || VT != MVT::i1 || N0.getOpcode() != ISD::SETCC ||
+ !N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ // Convert scalar FP compares and logic to vector compares (COMIS* to CMPS*)
+ // and vector logic:
+ // logic (setcc N00, N01), (setcc N10, N11) -->
+ // extelt (logic (setcc (s2v N00), (s2v N01)), setcc (s2v N10), (s2v N11))), 0
+ unsigned NumElts = 128 / N00Type.getSizeInBits();
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), N00Type, NumElts);
+ EVT BoolVecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
+ SDValue ZeroIndex = DAG.getVectorIdxConstant(0, DL);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N11 = N1.getOperand(1);
+ SDValue Vec00 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N00);
+ SDValue Vec01 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N01);
+ SDValue Vec10 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N10);
+ SDValue Vec11 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N11);
+ SDValue Setcc0 = DAG.getSetCC(DL, BoolVecVT, Vec00, Vec01,
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ SDValue Setcc1 = DAG.getSetCC(DL, BoolVecVT, Vec10, Vec11,
+ cast<CondCodeSDNode>(N1.getOperand(2))->get());
+ SDValue Logic = DAG.getNode(N->getOpcode(), DL, BoolVecVT, Setcc0, Setcc1);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Logic, ZeroIndex);
}
// Attempt to fold BITOP(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(BITOP(X,Y))
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=SSE2
-; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s --check-prefixes=AVX
-; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512
define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) {
; SSE2-LABEL: olt_ole_and_f32:
; SSE2-NEXT: andb %cl, %al
; SSE2-NEXT: retq
;
-; AVX-LABEL: olt_ole_and_f32:
-; AVX: # %bb.0:
-; AVX-NEXT: vucomiss %xmm0, %xmm1
-; AVX-NEXT: seta %cl
-; AVX-NEXT: vucomiss %xmm2, %xmm3
-; AVX-NEXT: setae %al
-; AVX-NEXT: andb %cl, %al
-; AVX-NEXT: retq
+; AVX1-LABEL: olt_ole_and_f32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vcmpleps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: # kill: def $al killed $al killed $eax
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: olt_ole_and_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
+; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NEXT: vcmpltps %zmm1, %zmm0, %k1
+; AVX512-NEXT: vcmpleps %zmm3, %zmm2, %k0 {%k1}
+; AVX512-NEXT: kmovw %k0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%f1 = fcmp olt float %w, %x
%f2 = fcmp ole float %y, %z
%r = and i1 %f1, %f2
; SSE2-NEXT: orb %cl, %al
; SSE2-NEXT: retq
;
-; AVX-LABEL: oge_oeq_or_f32:
-; AVX: # %bb.0:
-; AVX-NEXT: vucomiss %xmm1, %xmm0
-; AVX-NEXT: setae %cl
-; AVX-NEXT: vucomiss %xmm3, %xmm2
-; AVX-NEXT: setnp %dl
-; AVX-NEXT: sete %al
-; AVX-NEXT: andb %dl, %al
-; AVX-NEXT: orb %cl, %al
-; AVX-NEXT: retq
+; AVX1-LABEL: oge_oeq_or_f32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vcmpeqps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpleps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vorps %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: # kill: def $al killed $al killed $eax
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: oge_oeq_or_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
+; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NEXT: vcmpeqps %zmm3, %zmm2, %k0
+; AVX512-NEXT: vcmpleps %zmm0, %zmm1, %k1
+; AVX512-NEXT: korw %k0, %k1, %k0
+; AVX512-NEXT: kmovw %k0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%f1 = fcmp oge float %w, %x
%f2 = fcmp oeq float %y, %z
%r = or i1 %f1, %f2
; SSE2-NEXT: xorb %cl, %al
; SSE2-NEXT: retq
;
-; AVX-LABEL: ord_one_xor_f32:
-; AVX: # %bb.0:
-; AVX-NEXT: vucomiss %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: vucomiss %xmm3, %xmm2
-; AVX-NEXT: setne %al
-; AVX-NEXT: xorb %cl, %al
-; AVX-NEXT: retq
+; AVX1-LABEL: ord_one_xor_f32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vcmpneq_oqps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpordps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: # kill: def $al killed $al killed $eax
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: ord_one_xor_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
+; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NEXT: vcmpneq_oqps %zmm3, %zmm2, %k0
+; AVX512-NEXT: vcmpordps %zmm1, %zmm0, %k1
+; AVX512-NEXT: kxorw %k0, %k1, %k0
+; AVX512-NEXT: kmovw %k0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%f1 = fcmp ord float %w, %x
%f2 = fcmp one float %y, %z
%r = xor i1 %f1, %f2
; SSE2-NEXT: andb %cl, %al
; SSE2-NEXT: retq
;
-; AVX-LABEL: une_ugt_and_f64:
-; AVX: # %bb.0:
-; AVX-NEXT: vucomisd %xmm1, %xmm0
-; AVX-NEXT: setp %al
-; AVX-NEXT: setne %cl
-; AVX-NEXT: orb %al, %cl
-; AVX-NEXT: vucomisd %xmm2, %xmm3
-; AVX-NEXT: setb %al
-; AVX-NEXT: andb %cl, %al
-; AVX-NEXT: retq
+; AVX1-LABEL: une_ugt_and_f64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vcmpnlepd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: # kill: def $al killed $al killed $eax
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: une_ugt_and_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
+; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NEXT: vcmpneqpd %zmm1, %zmm0, %k1
+; AVX512-NEXT: vcmpnlepd %zmm3, %zmm2, %k0 {%k1}
+; AVX512-NEXT: kmovw %k0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%f1 = fcmp une double %w, %x
%f2 = fcmp ugt double %y, %z
%r = and i1 %f1, %f2
; SSE2-NEXT: orb %cl, %al
; SSE2-NEXT: retq
;
-; AVX-LABEL: ult_uge_or_f64:
-; AVX: # %bb.0:
-; AVX-NEXT: vucomisd %xmm1, %xmm0
-; AVX-NEXT: setb %cl
-; AVX-NEXT: vucomisd %xmm2, %xmm3
-; AVX-NEXT: setbe %al
-; AVX-NEXT: orb %cl, %al
-; AVX-NEXT: retq
+; AVX1-LABEL: ult_uge_or_f64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vcmpnltpd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vorpd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: # kill: def $al killed $al killed $eax
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: ult_uge_or_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
+; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NEXT: vcmpnltpd %zmm3, %zmm2, %k0
+; AVX512-NEXT: vcmpnlepd %zmm0, %zmm1, %k1
+; AVX512-NEXT: korw %k0, %k1, %k0
+; AVX512-NEXT: kmovw %k0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%f1 = fcmp ult double %w, %x
%f2 = fcmp uge double %y, %z
%r = or i1 %f1, %f2
; SSE2-NEXT: xorb %cl, %al
; SSE2-NEXT: retq
;
-; AVX-LABEL: une_uno_xor_f64:
-; AVX: # %bb.0:
-; AVX-NEXT: vucomisd %xmm1, %xmm0
-; AVX-NEXT: setp %al
-; AVX-NEXT: setne %cl
-; AVX-NEXT: orb %al, %cl
-; AVX-NEXT: vucomisd %xmm3, %xmm2
-; AVX-NEXT: setp %al
-; AVX-NEXT: xorb %cl, %al
-; AVX-NEXT: retq
+; AVX1-LABEL: une_uno_xor_f64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vcmpunordpd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: # kill: def $al killed $al killed $eax
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: une_uno_xor_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
+; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NEXT: vcmpunordpd %zmm3, %zmm2, %k0
+; AVX512-NEXT: vcmpneqpd %zmm1, %zmm0, %k1
+; AVX512-NEXT: kxorw %k0, %k1, %k0
+; AVX512-NEXT: kmovw %k0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%f1 = fcmp une double %w, %x
%f2 = fcmp uno double %y, %z
%r = xor i1 %f1, %f2
ret i1 %r
}
+; This uses ucomis because the types do not match.
+; TODO: Merge down to narrow type?
+
define i1 @olt_olt_and_f32_f64(float %w, float %x, double %y, double %z) {
; SSE2-LABEL: olt_olt_and_f32_f64:
; SSE2: # %bb.0:
ret i1 %r
}
+; This uses ucomis because of extra uses.
+
define i1 @une_uno_xor_f64_use1(double %w, double %x, double %y, double %z, i1* %p) {
; SSE2-LABEL: une_uno_xor_f64_use1:
; SSE2: # %bb.0:
ret i1 %r
}
+; This uses ucomis because of extra uses.
+
define i1 @une_uno_xor_f64_use2(double %w, double %x, double %y, double %z, i1* %p) {
; SSE2-LABEL: une_uno_xor_f64_use2:
; SSE2: # %bb.0: