ONE is currently softened to OGT | OLT. But the libcalls for OGT and OLT libcalls will trigger an exception for QNAN. At least for X86 with libgcc. UEQ on the other hand uses UO | OEQ. The UO and OEQ libcalls will not trigger an exception for QNAN.
This patch changes ONE to use the inverse of the UEQ lowering. So we now produce O & UNE. Technically the existing behavior was correct for a signalling ONE, but since I don't know how to generate one of those from clang that seemed like something we can deal with later as we would need to fix other predicates as well. Also removing spurious exceptions seemed better than missing an exception.
There are also problems with quiet OGT/OLT/OLE/OGE, but those are harder to fix.
Differential Revision: https://reviews.llvm.org/D72477
(VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
break;
case ISD::SETONE:
- // SETONE = SETOLT | SETOGT
- LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
- (VT == MVT::f64) ? RTLIB::OLT_F64 :
- (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
- LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
- (VT == MVT::f64) ? RTLIB::OGT_F64 :
- (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
- break;
+ // SETONE = O && UNE
+ ShouldInvertCC = true;
+ LLVM_FALLTHROUGH;
case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
NewLHS, NewRHS, DAG.getCondCode(CCCode));
auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
+ CCCode = getCmpLibcallCC(LC2);
+ if (ShouldInvertCC)
+ CCCode = getSetCCInverse(CCCode, RetVT);
NewLHS = DAG.getNode(
ISD::SETCC, dl,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
- Call2.first, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
+ Call2.first, NewRHS, DAG.getCondCode(CCCode));
if (Chain)
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
Call2.second);
- NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+ NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
+ Tmp.getValueType(), Tmp, NewLHS);
NewRHS = SDValue();
}
}
}
define i1 @cmp_f_one(float %a, float %b) {
; CHECK-LABEL: cmp_f_one:
-; NONE: bl __aeabi_fcmpgt
-; NONE: bl __aeabi_fcmplt
+; NONE: bl __aeabi_fcmpeq
+; NONE: bl __aeabi_fcmpun
; HARD: vcmp.f32
; HARD: movmi r0, #1
; HARD: movgt r0, #1
}
define i1 @cmp_d_one(double %a, double %b) {
; CHECK-LABEL: cmp_d_one:
-; NONE: bl __aeabi_dcmpgt
-; NONE: bl __aeabi_dcmplt
-; SP: bl __aeabi_dcmpgt
-; SP: bl __aeabi_dcmplt
+; NONE: bl __aeabi_dcmpeq
+; NONE: bl __aeabi_dcmpun
+; SP: bl __aeabi_dcmpeq
+; SP: bl __aeabi_dcmpun
; DP: vcmp.f64
; DP: movmi r0, #1
; DP: movgt r0, #1
; CHECK-NEXT: .cfi_offset %rbx, -16
; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: callq __gttf2
+; CHECK-NEXT: callq __eqtf2
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: setg %bl
+; CHECK-NEXT: setne %bl
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; CHECK-NEXT: callq __lttf2
+; CHECK-NEXT: callq __unordtf2
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: sets %al
-; CHECK-NEXT: orb %bl, %al
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: andb %bl, %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: movq %rsi, %r14
; CHECK-NEXT: movq %rdi, %rbx
-; CHECK-NEXT: callq __gttf2
+; CHECK-NEXT: callq __eqtf2
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: setg %bpl
+; CHECK-NEXT: setne %bpl
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; CHECK-NEXT: callq __lttf2
+; CHECK-NEXT: callq __unordtf2
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: sets %al
-; CHECK-NEXT: orb %bpl, %al
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: testb %bpl, %al
; CHECK-NEXT: cmoveq %r14, %rbx
; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: addq $32, %rsp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll __gttf2
+; X86-NEXT: calll __eqtf2
; X86-NEXT: addl $32, %esp
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setg %bl
+; X86-NEXT: setne %bl
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: calll __lttf2
+; X86-NEXT: calll __unordtf2
; X86-NEXT: addl $32, %esp
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: sets %al
-; X86-NEXT: orb %bl, %al
+; X86-NEXT: sete %al
+; X86-NEXT: testb %bl, %al
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmovnel %eax, %ecx
; CHECK: sete
; CHECK: calll __unorddf2
; CHECK: setne
+; CHECK: or
; CHECK: retl
define i1 @test11(double %d) #0 {
ret i1 %cmp
}
; CHECK-LABEL: test11:
-; CHECK: calll __gtdf2
-; CHECK: setg
-; CHECK: calll __ltdf2
-; CHECK: sets
+; CHECK: calll __eqdf2
+; CHECK: setne
+; CHECK: calll __unorddf2
+; CHECK: sete
+; CHECK: and
; CHECK: retl
define i1 @test12(double %d) #0 {