From 3bc899b4de74d395b03d3969d700aac71f89bc12 Mon Sep 17 00:00:00 2001
From: Bob Haarman <llvm@inglorion.net>
Date: Wed, 9 Jun 2021 13:41:26 -0700
Subject: [PATCH] [X86] avoid assert with varargs, soft float, and
 no-implicit-float

Fixes:
 - PR36507 Floating point varargs are not handled correctly with
   -mno-implicit-float
 - PR48528 __builtin_va_start assumes it can pass SSE registers
   when using -Xclang -msoft-float -Xclang -no-implicit-float

On x86_64, floating-point parameters are normally passed in XMM
registers. For va_start, we spill those to memory so va_arg can
find them. There is an interaction here with -msoft-float and
-no-implicit-float:

When -msoft-float is in effect, instead of passing floating-point
parameters in XMM registers, they are passed in general-purpose
registers.

When -no-implicit-float is in effect, it "disables implicit
floating-point instructions" (per the LangRef). The intended
effect is to not have the compiler generate floating-point code
unless explicit floating-point operations are present in the
source code, but what exactly counts as an explicit floating-point
operation is not specified. The existing behavior of LLVM here has
led to some surprises and PRs.

This change modifies the behavior as follows:

  | soft | no-implicit | old behavior    | new behavior    |
  |  no  |   no        | spill XMM regs  | spill XMM regs  |
  | yes  |   no        | don't spill XMM | don't spill XMM |
  |  no  |  yes        | don't spill XMM | spill XMM regs  |
  | yes  |  yes        | assert          | don't spill XMM |

In particular, this avoids the assert that happens when
-msoft-float and -no-implicit-float are both in effect. This
seems like a perfectly reasonable combination: If we don't want
to rely on hardware floating-point support, we want to both
avoid using float registers to pass parameters and avoid having
the compiler generate floating-point code that wasn't in the
original program. Instead of crashing the compiler, the new
behavior is to not synthesize floating-point code in this
case. This fixes PR48528.

The other interesting case is when -no-implicit-float is in
effect, but -msoft-float is not. In that case, any floating-point
parameters that are present will be in XMM registers, and so we
have to spill them to correctly handle those. This fixes
PR36507. The spill is conditional on %al indicating that
parameters are present in XMM registers, so no floating-point
code will be executed unless the function is called with
floating-point parameters.

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D104001
---
 llvm/lib/Target/X86/X86ISelLowering.cpp    | 11 +---
 llvm/test/CodeGen/X86/varargs-softfloat.ll | 81 ++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/varargs-softfloat.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6b7aea4..2148edd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3377,12 +3377,8 @@ static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
     return None;
   }
 
-  const Function &F = MF.getFunction();
-  bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
   bool isSoftFloat = Subtarget.useSoftFloat();
-  assert(!(isSoftFloat && NoImplicitFloatOps) &&
-         "SSE register cannot be used when SSE is disabled!");
-  if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
+  if (isSoftFloat || !Subtarget.hasSSE1())
     // Kernel mode asks for SSE to be disabled, so there are no XMM argument
     // registers.
     return None;
@@ -3454,11 +3450,6 @@ void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
         FrameInfo.CreateFixedObject(1, StackSize, true));
   }
 
-  // Figure out if XMM registers are in use.
-  assert(!(Subtarget.useSoftFloat() &&
-           TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) &&
-         "SSE register cannot be used when SSE is disabled!");
-
   // 64-bit calling conventions support varargs and register parameters, so we
   // have to do extra work to spill them in the prologue.
   if (is64Bit()) {
diff --git a/llvm/test/CodeGen/X86/varargs-softfloat.ll b/llvm/test/CodeGen/X86/varargs-softfloat.ll
new file mode 100644
index 0000000..f25a3f9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/varargs-softfloat.ll
@@ -0,0 +1,81 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+declare void @llvm.va_end(i8*) #0
+declare void @llvm.va_start(i8*) #10
+
+define void @hardf(i8* %fmt, ...) #1 {
+; CHECK-LABEL: hardf
+; When using XMM registers to pass floating-point parameters,
+; we need to spill those for va_start.
+; CHECK: testb %al, %al
+; CHECK: movaps  %xmm0, {{.*}}%rsp
+; CHECK: movaps  %xmm1, {{.*}}%rsp
+; CHECK: movaps  %xmm2, {{.*}}%rsp
+; CHECK: movaps  %xmm3, {{.*}}%rsp
+; CHECK: movaps  %xmm4, {{.*}}%rsp
+; CHECK: movaps  %xmm5, {{.*}}%rsp
+; CHECK: movaps  %xmm6, {{.*}}%rsp
+; CHECK: movaps  %xmm7, {{.*}}%rsp
+  %va = alloca [1 x %struct.__va_list_tag], align 16
+  %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %va, i64 0, i64 0
+  %a = bitcast %struct.__va_list_tag* %arraydecay to i8*
+  call void @llvm.va_start(i8* %a)
+  call void @llvm.va_end(i8* nonnull %a)
+  ret void
+}
+
+define void @softf(i8* %fmt, ...) #2 {
+; CHECK-LABEL: softf
+; For software floating point, floats are passed in general
+; purpose registers, so no need to spill XMM registers.
+; CHECK-NOT: %xmm
+; CHECK: retq
+  %va = alloca [1 x %struct.__va_list_tag], align 16
+  %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %va, i64 0, i64 0
+  %a = bitcast %struct.__va_list_tag* %arraydecay to i8*
+  call void @llvm.va_start(i8* %a)
+  call void @llvm.va_end(i8* nonnull %a)
+  ret void
+}
+
+define void @noimplf(i8* %fmt, ...) #3 {
+; CHECK-LABEL: noimplf
+; Even with noimplicitfloat, when using the hardware float API, we
+; need to emit code to spill the XMM registers (PR36507).
+; CHECK: testb %al, %al
+; CHECK: movaps  %xmm0, {{.*}}%rsp
+; CHECK: movaps  %xmm1, {{.*}}%rsp
+; CHECK: movaps  %xmm2, {{.*}}%rsp
+; CHECK: movaps  %xmm3, {{.*}}%rsp
+; CHECK: movaps  %xmm4, {{.*}}%rsp
+; CHECK: movaps  %xmm5, {{.*}}%rsp
+; CHECK: movaps  %xmm6, {{.*}}%rsp
+; CHECK: movaps  %xmm7, {{.*}}%rsp
+  %va = alloca [1 x %struct.__va_list_tag], align 16
+  %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %va, i64 0, i64 0
+  %a = bitcast %struct.__va_list_tag* %arraydecay to i8*
+  call void @llvm.va_start(i8* %a)
+  call void @llvm.va_end(i8* nonnull %a)
+  ret void
+}
+
+define void @noimplsoftf(i8* %fmt, ...) #4 {
+; CHECK-LABEL: noimplsoftf
+; Combining noimplicitfloat and use-soft-float should not assert (PR48528).
+; CHECK-NOT: %xmm
+; CHECK: retq
+  %va = alloca [1 x %struct.__va_list_tag], align 16
+  %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %va, i64 0, i64 0
+  %a = bitcast %struct.__va_list_tag* %arraydecay to i8*
+  call void @llvm.va_start(i8* %a)
+  call void @llvm.va_end(i8* nonnull %a)
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind uwtable }
+attributes #2 = { nounwind uwtable "use-soft-float"="true" }
+attributes #3 = { noimplicitfloat nounwind uwtable }
+attributes #4 = { noimplicitfloat nounwind uwtable "use-soft-float"="true" }
-- 
2.7.4