From a6e3027db7ebe6863e44bafcfeaacc16bdc88a3f Mon Sep 17 00:00:00 2001 From: icedrocket <114203630+icedrocket@users.noreply.github.com> Date: Wed, 18 Jan 2023 22:35:13 -0800 Subject: [PATCH] [X86] Avoid converting u64 to f32 using x87 on Windows The code below currently prints less accurate values only on Windows 32-bit. On Windows, the default precision control on x87 is only 53-bit, and FADD triggers rounding with that precision, so the final result may be less accurate. This revision avoids less accurate conversions by using library calls instead. ``` int main() { int64_t n = 0b0000000000111111111111111111111111011111111111111111111111111111; printf("%lld, %.0f, %.0f", n, (float)n, (float)(uint64_t)n); return 0; } ``` Reviewed By: craig.topper, lebedev.ri Differential Revision: https://reviews.llvm.org/D141074 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +++++++ llvm/test/CodeGen/X86/uint64-to-float.ll | 36 ++++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c88c66d..fb68a9c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21898,6 +21898,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, } assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); + + // On Windows, the default precision control on x87 is only 53-bit, and FADD + // triggers rounding with that precision, so the final result may be less + // accurate. 18014397972611071 is one such case. + if (Subtarget.isOSWindows()) + return SDValue(); + SDValue ValueToStore = Src; if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) { // Bitcasting to f64 here allows us to do a single 64-bit store from diff --git a/llvm/test/CodeGen/X86/uint64-to-float.ll b/llvm/test/CodeGen/X86/uint64-to-float.ll index 8b66234..f44986b 100644 --- a/llvm/test/CodeGen/X86/uint64-to-float.ll +++ b/llvm/test/CodeGen/X86/uint64-to-float.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-apple-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-windows -mattr=+sse2 | FileCheck %s --check-prefix=X86-WIN +; RUN: llc < %s -mtriple=x86_64-windows -mattr=+sse2 | FileCheck %s --check-prefix=X64-WIN ; Verify that we are using the efficient uitofp --> sitofp lowering illustrated ; by the compiler_rt implementation of __floatundisf. @@ -42,6 +44,36 @@ define float @test(i64 %a) nounwind { ; X64-NEXT: cvtsi2ss %rdi, %xmm0 ; X64-NEXT: addss %xmm0, %xmm0 ; X64-NEXT: retq +; +; X86-WIN-LABEL: test: +; X86-WIN: # %bb.0: # %entry +; X86-WIN-NEXT: pushl %ebp +; X86-WIN-NEXT: movl %esp, %ebp +; X86-WIN-NEXT: andl $-8, %esp +; X86-WIN-NEXT: subl $8, %esp +; X86-WIN-NEXT: pushl 12(%ebp) +; X86-WIN-NEXT: pushl 8(%ebp) +; X86-WIN-NEXT: calll ___floatundisf +; X86-WIN-NEXT: addl $8, %esp +; X86-WIN-NEXT: movl %ebp, %esp +; X86-WIN-NEXT: popl %ebp +; X86-WIN-NEXT: retl +; +; X64-WIN-LABEL: test: +; X64-WIN: # %bb.0: # %entry +; X64-WIN-NEXT: testq %rcx, %rcx +; X64-WIN-NEXT: js .LBB0_1 +; X64-WIN-NEXT: # %bb.2: # %entry +; X64-WIN-NEXT: cvtsi2ss %rcx, %xmm0 +; X64-WIN-NEXT: retq +; X64-WIN-NEXT: .LBB0_1: +; X64-WIN-NEXT: movq %rcx, %rax +; X64-WIN-NEXT: shrq %rax +; X64-WIN-NEXT: andl $1, %ecx +; X64-WIN-NEXT: orq %rax, %rcx +; X64-WIN-NEXT: cvtsi2ss %rcx, %xmm0 +; X64-WIN-NEXT: addss %xmm0, %xmm0 +; X64-WIN-NEXT: retq entry: %b = uitofp i64 %a to float ret float %b -- 2.7.4