"(State.getMachineFunction().getSubtarget()).", F),
A>;
+/// CCIfNotSubtarget - Match if the current subtarget doesn't has a feature F.
+class CCIfNotSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("!static_cast<const X86Subtarget&>"
+ "(State.getMachineFunction().getSubtarget()).", F),
+ A>;
+
// Register classes for RegCall
class RC_X86_RegCall {
list<Register> GPR_8 = [];
// MM0, it doesn't support these vector types.
CCIfType<[x86mmx], CCAssignToReg<[MM0]>>,
- // Long double types are always returned in FP0 (even with SSE).
- CCIfType<[f80], CCAssignToReg<[FP0, FP1]>>
+ // Long double types are always returned in FP0 (even with SSE),
+ // except on Win64.
+ CCIfNotSubtarget<"isTargetWin64()", CCIfType<[f80], CCAssignToReg<[FP0, FP1]>>>
]>;
// X86-32 C return-value convention.
// 512 bit vectors are passed by pointer
CCIfType<[v16i32, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
+ // Long doubles are passed by pointer
+ CCIfType<[f80], CCPassIndirect<i64>>,
+
// The first 4 MMX vector arguments are passed in GPRs.
CCIfType<[x86mmx], CCBitConvertToType<i64>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
- CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-
- // Long doubles get stack slots whose size and alignment depends on the
- // subtarget.
- CCIfType<[f80], CCAssignToStack<0, 0>>
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
]>;
def CC_X86_Win64_VectorCall : CallingConv<[
;
; WIN-LABEL: exp_f80:
; WIN: # %bb.0:
-; WIN-NEXT: subq $56, %rsp
-; WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT: pushq %rsi
+; WIN-NEXT: subq $64, %rsp
+; WIN-NEXT: movq %rcx, %rsi
+; WIN-NEXT: fldt (%rdx)
; WIN-NEXT: fstpt {{[0-9]+}}(%rsp)
+; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; WIN-NEXT: callq expl
-; WIN-NEXT: addq $56, %rsp
+; WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT: fstpt (%rsi)
+; WIN-NEXT: movq %rsi, %rax
+; WIN-NEXT: addq $64, %rsp
+; WIN-NEXT: popq %rsi
; WIN-NEXT: retq
;
; MAC-LABEL: exp_f80:
;
; WIN-LABEL: exp2_f80:
; WIN: # %bb.0:
-; WIN-NEXT: subq $56, %rsp
-; WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT: pushq %rsi
+; WIN-NEXT: subq $64, %rsp
+; WIN-NEXT: movq %rcx, %rsi
+; WIN-NEXT: fldt (%rdx)
; WIN-NEXT: fstpt {{[0-9]+}}(%rsp)
+; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; WIN-NEXT: callq exp2l
-; WIN-NEXT: addq $56, %rsp
+; WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT: fstpt (%rsi)
+; WIN-NEXT: movq %rsi, %rax
+; WIN-NEXT: addq $64, %rsp
+; WIN-NEXT: popq %rsi
; WIN-NEXT: retq
;
; MAC-LABEL: exp2_f80:
;
; WIN-LABEL: log_f80:
; WIN: # %bb.0:
-; WIN-NEXT: subq $56, %rsp
-; WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT: pushq %rsi
+; WIN-NEXT: subq $64, %rsp
+; WIN-NEXT: movq %rcx, %rsi
+; WIN-NEXT: fldt (%rdx)
; WIN-NEXT: fstpt {{[0-9]+}}(%rsp)
+; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; WIN-NEXT: callq logl
-; WIN-NEXT: addq $56, %rsp
+; WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT: fstpt (%rsi)
+; WIN-NEXT: movq %rsi, %rax
+; WIN-NEXT: addq $64, %rsp
+; WIN-NEXT: popq %rsi
; WIN-NEXT: retq
;
; MAC-LABEL: log_f80:
;
; WIN-LABEL: log2_f80:
; WIN: # %bb.0:
-; WIN-NEXT: subq $56, %rsp
-; WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT: pushq %rsi
+; WIN-NEXT: subq $64, %rsp
+; WIN-NEXT: movq %rcx, %rsi
+; WIN-NEXT: fldt (%rdx)
; WIN-NEXT: fstpt {{[0-9]+}}(%rsp)
+; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; WIN-NEXT: callq log2l
-; WIN-NEXT: addq $56, %rsp
+; WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT: fstpt (%rsi)
+; WIN-NEXT: movq %rsi, %rax
+; WIN-NEXT: addq $64, %rsp
+; WIN-NEXT: popq %rsi
; WIN-NEXT: retq
;
; MAC-LABEL: log2_f80:
;
; WIN-LABEL: log10_f80:
; WIN: # %bb.0:
-; WIN-NEXT: subq $56, %rsp
-; WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT: pushq %rsi
+; WIN-NEXT: subq $64, %rsp
+; WIN-NEXT: movq %rcx, %rsi
+; WIN-NEXT: fldt (%rdx)
; WIN-NEXT: fstpt {{[0-9]+}}(%rsp)
+; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; WIN-NEXT: callq log10l
-; WIN-NEXT: addq $56, %rsp
+; WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT: fstpt (%rsi)
+; WIN-NEXT: movq %rsi, %rax
+; WIN-NEXT: addq $64, %rsp
+; WIN-NEXT: popq %rsi
; WIN-NEXT: retq
;
; MAC-LABEL: log10_f80:
;
; WIN-LABEL: pow_f80:
; WIN: # %bb.0:
-; WIN-NEXT: subq $72, %rsp
-; WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT: pushq %rsi
+; WIN-NEXT: subq $80, %rsp
+; WIN-NEXT: movq %rcx, %rsi
+; WIN-NEXT: fldt (%rdx)
; WIN-NEXT: fld %st(0)
; WIN-NEXT: fstpt {{[0-9]+}}(%rsp)
; WIN-NEXT: fstpt {{[0-9]+}}(%rsp)
+; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8
; WIN-NEXT: callq powl
-; WIN-NEXT: addq $72, %rsp
+; WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; WIN-NEXT: fstpt (%rsi)
+; WIN-NEXT: movq %rsi, %rax
+; WIN-NEXT: addq $80, %rsp
+; WIN-NEXT: popq %rsi
; WIN-NEXT: retq
;
; MAC-LABEL: pow_f80:
; AVX512_64_WIN-LABEL: x_to_u64:
; AVX512_64_WIN: # %bb.0:
; AVX512_64_WIN-NEXT: pushq %rax
-; AVX512_64_WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512_64_WIN-NEXT: fldt (%rcx)
; AVX512_64_WIN-NEXT: flds __real@{{.*}}(%rip)
; AVX512_64_WIN-NEXT: fld %st(1)
; AVX512_64_WIN-NEXT: fsub %st(1)
;
; SSE3_64_WIN-LABEL: x_to_u64:
; SSE3_64_WIN: # %bb.0:
-; SSE3_64_WIN-NEXT: subq $24, %rsp
-; SSE3_64_WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE3_64_WIN-NEXT: subq $16, %rsp
+; SSE3_64_WIN-NEXT: fldt (%rcx)
; SSE3_64_WIN-NEXT: flds __real@{{.*}}(%rip)
; SSE3_64_WIN-NEXT: fld %st(1)
; SSE3_64_WIN-NEXT: fsub %st(1)
; SSE3_64_WIN-NEXT: fisttpll {{[0-9]+}}(%rsp)
; SSE3_64_WIN-NEXT: fld %st(1)
-; SSE3_64_WIN-NEXT: fisttpll {{[0-9]+}}(%rsp)
+; SSE3_64_WIN-NEXT: fisttpll (%rsp)
; SSE3_64_WIN-NEXT: fucompi %st(1)
; SSE3_64_WIN-NEXT: fstp %st(0)
; SSE3_64_WIN-NEXT: jbe .LBB4_1
; SSE3_64_WIN-NEXT: # %bb.2:
-; SSE3_64_WIN-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; SSE3_64_WIN-NEXT: addq $24, %rsp
+; SSE3_64_WIN-NEXT: movq (%rsp), %rax
+; SSE3_64_WIN-NEXT: addq $16, %rsp
; SSE3_64_WIN-NEXT: retq
; SSE3_64_WIN-NEXT: .LBB4_1:
; SSE3_64_WIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; SSE3_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax
-; SSE3_64_WIN-NEXT: addq $24, %rsp
+; SSE3_64_WIN-NEXT: addq $16, %rsp
; SSE3_64_WIN-NEXT: retq
;
; SSE3_64_LIN-LABEL: x_to_u64:
; SSE2_64_WIN-LABEL: x_to_u64:
; SSE2_64_WIN: # %bb.0:
; SSE2_64_WIN-NEXT: subq $24, %rsp
-; SSE2_64_WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE2_64_WIN-NEXT: fldt (%rcx)
; SSE2_64_WIN-NEXT: flds __real@{{.*}}(%rip)
; SSE2_64_WIN-NEXT: fld %st(1)
; SSE2_64_WIN-NEXT: fsub %st(1)
; AVX512_64_WIN-LABEL: x_to_s64:
; AVX512_64_WIN: # %bb.0:
; AVX512_64_WIN-NEXT: pushq %rax
-; AVX512_64_WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512_64_WIN-NEXT: fldt (%rcx)
; AVX512_64_WIN-NEXT: fisttpll (%rsp)
; AVX512_64_WIN-NEXT: movq (%rsp), %rax
; AVX512_64_WIN-NEXT: popq %rcx
; SSE3_64_WIN-LABEL: x_to_s64:
; SSE3_64_WIN: # %bb.0:
; SSE3_64_WIN-NEXT: pushq %rax
-; SSE3_64_WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE3_64_WIN-NEXT: fldt (%rcx)
; SSE3_64_WIN-NEXT: fisttpll (%rsp)
; SSE3_64_WIN-NEXT: movq (%rsp), %rax
; SSE3_64_WIN-NEXT: popq %rcx
;
; SSE2_64_WIN-LABEL: x_to_s64:
; SSE2_64_WIN: # %bb.0:
-; SSE2_64_WIN-NEXT: subq $24, %rsp
-; SSE2_64_WIN-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE2_64_WIN-NEXT: subq $16, %rsp
+; SSE2_64_WIN-NEXT: fldt (%rcx)
; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp)
; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F
; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp)
; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp)
; SSE2_64_WIN-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; SSE2_64_WIN-NEXT: addq $24, %rsp
+; SSE2_64_WIN-NEXT: addq $16, %rsp
; SSE2_64_WIN-NEXT: retq
;
; SSE2_64_LIN-LABEL: x_to_s64:
--- /dev/null
+; RUN: llc -mtriple x86_64-w64-mingw32 %s -o - | FileCheck %s
+
+@glob = common dso_local local_unnamed_addr global x86_fp80 0xK00000000000000000000, align 16
+
+define dso_local void @call() {
+entry:
+ %0 = load x86_fp80, x86_fp80* @glob, align 16
+ %1 = tail call x86_fp80 @floorl(x86_fp80 %0)
+ store x86_fp80 %1, x86_fp80* @glob, align 16
+ ret void
+}
+
+declare x86_fp80 @floorl(x86_fp80)
+
+; CHECK-LABEL: call
+; CHECK: fldt glob(%rip)
+; CHECK: fstpt [[ARGOFF:[0-9]+]](%rsp)
+; CHECK: leaq [[RETOFF:[0-9]+]](%rsp), %rcx
+; CHECK: leaq [[ARGOFF]](%rsp), %rdx
+; CHECK: callq floorl
+; CHECK: fldt [[RETOFF]](%rsp)
+; CHECK: fstpt glob(%rip)