From 8eade092497e17ad1cfbb1a7c3d9156686b229cc Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 19 Feb 2019 22:37:00 +0000 Subject: [PATCH] [X86] Mark FP32_TO_INT16_IN_MEM/FP32_TO_INT32_IN_MEM/FP32_TO_INT64_IN_MEM as clobbering EFLAGS to prevent mis-scheduling during conversion from SelectionDAG to MIR. After r354178, these instruction expand to a sequence that uses an OR instruction. That OR clobbers EFLAGS so we need to state that to avoid accidentally using the clobbered flags. Our tests show the bug, but I didn't notice because the SETcc instructions didn't move after r354178 since it used to be safe to do the fp->int conversion first. We should probably convert this whole sequence to SelectionDAG instead of a custom inserter to avoid mistakes like this. Fixes PR40779 llvm-svn: 354395 --- llvm/lib/Target/X86/X86InstrFPStack.td | 4 +- llvm/test/CodeGen/X86/fp-cvt.ll | 28 ++++++------ llvm/test/CodeGen/X86/scalar-fp-to-i64.ll | 76 +++++++++++++++---------------- 3 files changed, 55 insertions(+), 53 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td index 753e33b..f46aae1 100644 --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -113,7 +113,9 @@ def fpimmneg1 : FPImmLeaf; // Some 'special' instructions - expanded after instruction selection. -let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { +// Clobbers EFLAGS due to OR instruction used internally. +// FIXME: Can we model this in SelectionDAG? +let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [EFLAGS] in { def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src), [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>; def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src), diff --git a/llvm/test/CodeGen/X86/fp-cvt.ll b/llvm/test/CodeGen/X86/fp-cvt.ll index be09718..8164651 100644 --- a/llvm/test/CodeGen/X86/fp-cvt.ll +++ b/llvm/test/CodeGen/X86/fp-cvt.ll @@ -457,15 +457,15 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind { ; X86-NEXT: fldz ; X86-NEXT: .LBB10_2: ; X86-NEXT: fstp %st(0) +; X86-NEXT: setbe %al ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: orl $3072, %eax # imm = 0xC00 -; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl $3072, %ecx # imm = 0xC00 +; X86-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X86-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-NEXT: setbe %dl +; X86-NEXT: movzbl %al, %edx ; X86-NEXT: shll $31, %edx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -479,19 +479,19 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind { ; X64-X87-NEXT: flds {{.*}}(%rip) ; X64-X87-NEXT: fld %st(1) ; X64-X87-NEXT: fsub %st(1), %st +; X64-X87-NEXT: xorl %eax, %eax ; X64-X87-NEXT: fxch %st(1) ; X64-X87-NEXT: fucompi %st(2), %st ; X64-X87-NEXT: fcmovnbe %st(1), %st ; X64-X87-NEXT: fstp %st(1) +; X64-X87-NEXT: setbe %al ; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx -; X64-X87-NEXT: xorl %eax, %eax ; X64-X87-NEXT: orl $3072, %ecx # imm = 0xC00 ; X64-X87-NEXT: movw %cx, -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: setbe %al ; X64-X87-NEXT: shlq $63, %rax ; X64-X87-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; X64-X87-NEXT: retq @@ -539,15 +539,15 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind { ; X86-NEXT: fldz ; X86-NEXT: .LBB11_2: ; X86-NEXT: fstp %st(0) +; X86-NEXT: setbe %al ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: orl $3072, %eax # imm = 0xC00 -; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl $3072, %ecx # imm = 0xC00 +; X86-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X86-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-NEXT: setbe %dl +; X86-NEXT: movzbl %al, %edx ; X86-NEXT: shll $31, %edx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -561,19 +561,19 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind { ; X64-X87-NEXT: flds {{.*}}(%rip) ; X64-X87-NEXT: fld %st(1) ; X64-X87-NEXT: fsub %st(1), %st +; X64-X87-NEXT: xorl %eax, %eax ; X64-X87-NEXT: fxch %st(1) ; X64-X87-NEXT: fucompi %st(2), %st ; X64-X87-NEXT: fcmovnbe %st(1), %st ; X64-X87-NEXT: fstp %st(1) +; X64-X87-NEXT: setbe %al ; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx -; X64-X87-NEXT: xorl %eax, %eax ; X64-X87-NEXT: orl $3072, %ecx # imm = 0xC00 ; X64-X87-NEXT: movw %cx, -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: setbe %al ; X64-X87-NEXT: shlq $63, %rax ; X64-X87-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; X64-X87-NEXT: retq diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll index ec9156b..7576aa3 100644 --- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll +++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll @@ -289,15 +289,15 @@ define i64 @f_to_u64(float %a) nounwind { ; X87_WIN-NEXT: fldz ; X87_WIN-NEXT: LBB0_2: ; X87_WIN-NEXT: fstp %st(0) +; X87_WIN-NEXT: setbe %al ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) -; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X87_WIN-NEXT: xorl %edx, %edx -; X87_WIN-NEXT: orl $3072, %eax # imm = 0xC00 -; X87_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X87_WIN-NEXT: orl $3072, %ecx # imm = 0xC00 +; X87_WIN-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87_WIN-NEXT: setbe %dl +; X87_WIN-NEXT: movzbl %al, %edx ; X87_WIN-NEXT: shll $31, %edx ; X87_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -323,15 +323,15 @@ define i64 @f_to_u64(float %a) nounwind { ; X87_LIN-NEXT: fldz ; X87_LIN-NEXT: .LBB0_2: ; X87_LIN-NEXT: fstp %st(0) +; X87_LIN-NEXT: setbe %al ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) -; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X87_LIN-NEXT: xorl %edx, %edx -; X87_LIN-NEXT: orl $3072, %eax # imm = 0xC00 -; X87_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X87_LIN-NEXT: orl $3072, %ecx # imm = 0xC00 +; X87_LIN-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87_LIN-NEXT: setbe %dl +; X87_LIN-NEXT: movzbl %al, %edx ; X87_LIN-NEXT: shll $31, %edx ; X87_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -779,15 +779,15 @@ define i64 @d_to_u64(double %a) nounwind { ; X87_WIN-NEXT: fldz ; X87_WIN-NEXT: LBB2_2: ; X87_WIN-NEXT: fstp %st(0) +; X87_WIN-NEXT: setbe %al ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) -; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X87_WIN-NEXT: xorl %edx, %edx -; X87_WIN-NEXT: orl $3072, %eax # imm = 0xC00 -; X87_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X87_WIN-NEXT: orl $3072, %ecx # imm = 0xC00 +; X87_WIN-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87_WIN-NEXT: setbe %dl +; X87_WIN-NEXT: movzbl %al, %edx ; X87_WIN-NEXT: shll $31, %edx ; X87_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -813,15 +813,15 @@ define i64 @d_to_u64(double %a) nounwind { ; X87_LIN-NEXT: fldz ; X87_LIN-NEXT: .LBB2_2: ; X87_LIN-NEXT: fstp %st(0) +; X87_LIN-NEXT: setbe %al ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) -; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X87_LIN-NEXT: xorl %edx, %edx -; X87_LIN-NEXT: orl $3072, %eax # imm = 0xC00 -; X87_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X87_LIN-NEXT: orl $3072, %ecx # imm = 0xC00 +; X87_LIN-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87_LIN-NEXT: setbe %dl +; X87_LIN-NEXT: movzbl %al, %edx ; X87_LIN-NEXT: shll $31, %edx ; X87_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -1189,19 +1189,19 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE2_32_WIN-NEXT: flds __real@5f000000 ; SSE2_32_WIN-NEXT: fld %st(1) ; SSE2_32_WIN-NEXT: fsub %st(1), %st +; SSE2_32_WIN-NEXT: xorl %edx, %edx ; SSE2_32_WIN-NEXT: fxch %st(1) ; SSE2_32_WIN-NEXT: fucompi %st(2), %st ; SSE2_32_WIN-NEXT: fcmovnbe %st(1), %st ; SSE2_32_WIN-NEXT: fstp %st(1) +; SSE2_32_WIN-NEXT: setbe %dl ; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; SSE2_32_WIN-NEXT: xorl %edx, %edx ; SSE2_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00 ; SSE2_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; SSE2_32_WIN-NEXT: setbe %dl ; SSE2_32_WIN-NEXT: shll $31, %edx ; SSE2_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; SSE2_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -1216,19 +1216,19 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE2_32_LIN-NEXT: flds {{\.LCPI.*}} ; SSE2_32_LIN-NEXT: fld %st(1) ; SSE2_32_LIN-NEXT: fsub %st(1), %st +; SSE2_32_LIN-NEXT: xorl %edx, %edx ; SSE2_32_LIN-NEXT: fxch %st(1) ; SSE2_32_LIN-NEXT: fucompi %st(2), %st ; SSE2_32_LIN-NEXT: fcmovnbe %st(1), %st ; SSE2_32_LIN-NEXT: fstp %st(1) +; SSE2_32_LIN-NEXT: setbe %dl ; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; SSE2_32_LIN-NEXT: xorl %edx, %edx ; SSE2_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00 ; SSE2_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; SSE2_32_LIN-NEXT: setbe %dl ; SSE2_32_LIN-NEXT: shll $31, %edx ; SSE2_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -1242,19 +1242,19 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE2_64_WIN-NEXT: flds __real@{{.*}}(%rip) ; SSE2_64_WIN-NEXT: fld %st(1) ; SSE2_64_WIN-NEXT: fsub %st(1), %st +; SSE2_64_WIN-NEXT: xorl %eax, %eax ; SSE2_64_WIN-NEXT: fxch %st(1) ; SSE2_64_WIN-NEXT: fucompi %st(2), %st ; SSE2_64_WIN-NEXT: fcmovnbe %st(1), %st ; SSE2_64_WIN-NEXT: fstp %st(1) +; SSE2_64_WIN-NEXT: setbe %al ; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp) ; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx -; SSE2_64_WIN-NEXT: xorl %eax, %eax ; SSE2_64_WIN-NEXT: orl $3072, %ecx # imm = 0xC00 ; SSE2_64_WIN-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) ; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp) ; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: setbe %al ; SSE2_64_WIN-NEXT: shlq $63, %rax ; SSE2_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax ; SSE2_64_WIN-NEXT: addq $16, %rsp @@ -1266,19 +1266,19 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE2_64_LIN-NEXT: flds {{.*}}(%rip) ; SSE2_64_LIN-NEXT: fld %st(1) ; SSE2_64_LIN-NEXT: fsub %st(1), %st +; SSE2_64_LIN-NEXT: xorl %eax, %eax ; SSE2_64_LIN-NEXT: fxch %st(1) ; SSE2_64_LIN-NEXT: fucompi %st(2), %st ; SSE2_64_LIN-NEXT: fcmovnbe %st(1), %st ; SSE2_64_LIN-NEXT: fstp %st(1) +; SSE2_64_LIN-NEXT: setbe %al ; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx -; SSE2_64_LIN-NEXT: xorl %eax, %eax ; SSE2_64_LIN-NEXT: orl $3072, %ecx # imm = 0xC00 ; SSE2_64_LIN-NEXT: movw %cx, -{{[0-9]+}}(%rsp) ; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) ; SSE2_64_LIN-NEXT: fistpll -{{[0-9]+}}(%rsp) ; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: setbe %al ; SSE2_64_LIN-NEXT: shlq $63, %rax ; SSE2_64_LIN-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; SSE2_64_LIN-NEXT: retq @@ -1304,15 +1304,15 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; X87_WIN-NEXT: fldz ; X87_WIN-NEXT: LBB4_2: ; X87_WIN-NEXT: fstp %st(0) +; X87_WIN-NEXT: setbe %al ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) -; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X87_WIN-NEXT: xorl %edx, %edx -; X87_WIN-NEXT: orl $3072, %eax # imm = 0xC00 -; X87_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X87_WIN-NEXT: orl $3072, %ecx # imm = 0xC00 +; X87_WIN-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87_WIN-NEXT: setbe %dl +; X87_WIN-NEXT: movzbl %al, %edx ; X87_WIN-NEXT: shll $31, %edx ; X87_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -1338,15 +1338,15 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; X87_LIN-NEXT: fldz ; X87_LIN-NEXT: .LBB4_2: ; X87_LIN-NEXT: fstp %st(0) +; X87_LIN-NEXT: setbe %al ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) -; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X87_LIN-NEXT: xorl %edx, %edx -; X87_LIN-NEXT: orl $3072, %eax # imm = 0xC00 -; X87_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X87_LIN-NEXT: orl $3072, %ecx # imm = 0xC00 +; X87_LIN-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87_LIN-NEXT: setbe %dl +; X87_LIN-NEXT: movzbl %al, %edx ; X87_LIN-NEXT: shll $31, %edx ; X87_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax -- 2.7.4