From 5ae677e102e35e4cabe393ae6185e25e49c91160 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Wed, 1 Nov 2017 16:23:06 +0000
Subject: [PATCH] [X86] Add 64-bit int to float/double conversion with AVX  to
 X86FastISel::X86SelectSIToFP

Summary:
[X86] Teach fast isel to handle i64 sitofp with AVX.

For some reason we only handled i32 sitofp with AVX. But with SSE only we support i64 so we should do the same with AVX.

Also add i686 command lines for the 32-bit tests. 64-bit tests are in a separate file to avoid a fast-isel abort failure in 32-bit mode.

Reviewers: RKSimon, zvi

Reviewed By: RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D39450

llvm-svn: 317102
---
 llvm/lib/Target/X86/X86FastISel.cpp                |   7 +-
 .../X86/fast-isel-int-float-conversion-x86-64.ll   |  66 ++++++++++++
 .../CodeGen/X86/fast-isel-int-float-conversion.ll  | 116 ++++++++++++++++++++-
 3 files changed, 184 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll

diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index c248fb3..9ea7590c 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -2410,7 +2410,8 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
   if (!Subtarget->hasAVX())
     return false;
 
-  if (!I->getOperand(0)->getType()->isIntegerTy(32))
+  Type *InTy = I->getOperand(0)->getType();
+  if (!InTy->isIntegerTy(32) && !InTy->isIntegerTy(64))
     return false;
 
   // Select integer to float/double conversion.
@@ -2423,11 +2424,11 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
 
   if (I->getType()->isDoubleTy()) {
     // sitofp int -> double
-    Opcode = X86::VCVTSI2SDrr;
+    Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI2SD64rr : X86::VCVTSI2SDrr;
     RC = &X86::FR64RegClass;
   } else if (I->getType()->isFloatTy()) {
     // sitofp int -> float
-    Opcode = X86::VCVTSI2SSrr;
+    Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI2SS64rr : X86::VCVTSI2SSrr;
     RC = &X86::FR32RegClass;
   } else
     return false;
diff --git a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
new file mode 100644
index 0000000..432e190
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+
+
+define double @long_to_double_rr(i64 %a) {
+; SSE2-LABEL: long_to_double_rr:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2sdq %rdi, %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: long_to_double_rr:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = sitofp i64 %a to double
+  ret double %0
+}
+
+define double @long_to_double_rm(i64* %a) {
+; SSE2-LABEL: long_to_double_rm:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2sdq (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: long_to_double_rm:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2sdq (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = sitofp i64 %0 to double
+  ret double %1
+}
+
+define float @long_to_float_rr(i64 %a) {
+; SSE2-LABEL: long_to_float_rr:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2ssq %rdi, %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: long_to_float_rr:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = sitofp i64 %a to float
+  ret float %0
+}
+
+define float @long_to_float_rm(i64* %a) {
+; SSE2-LABEL: long_to_float_rm:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2ssq (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: long_to_float_rm:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = sitofp i64 %0 to float
+  ret float %1
+}
diff --git a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
index bc8f14f..2286fb9 100644
--- a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=SSE2
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX
+; RUN: llc -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=SSE2_X86
+; RUN: llc -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX_X86
 
 
 define double @int_to_double_rr(i32 %a) {
@@ -13,6 +15,39 @@ define double @int_to_double_rr(i32 %a) {
 ; AVX:       # BB#0: # %entry
 ; AVX-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; SSE2_X86-LABEL: int_to_double_rr:
+; SSE2_X86:       # BB#0: # %entry
+; SSE2_X86-NEXT:    pushl %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 8
+; SSE2_X86-NEXT:    .cfi_offset %ebp, -8
+; SSE2_X86-NEXT:    movl %esp, %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa_register %ebp
+; SSE2_X86-NEXT:    andl $-8, %esp
+; SSE2_X86-NEXT:    subl $8, %esp
+; SSE2_X86-NEXT:    movl 8(%ebp), %eax
+; SSE2_X86-NEXT:    cvtsi2sdl %eax, %xmm0
+; SSE2_X86-NEXT:    movsd %xmm0, (%esp)
+; SSE2_X86-NEXT:    fldl (%esp)
+; SSE2_X86-NEXT:    movl %ebp, %esp
+; SSE2_X86-NEXT:    popl %ebp
+; SSE2_X86-NEXT:    retl
+;
+; AVX_X86-LABEL: int_to_double_rr:
+; AVX_X86:       # BB#0: # %entry
+; AVX_X86-NEXT:    pushl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    .cfi_offset %ebp, -8
+; AVX_X86-NEXT:    movl %esp, %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_register %ebp
+; AVX_X86-NEXT:    andl $-8, %esp
+; AVX_X86-NEXT:    subl $8, %esp
+; AVX_X86-NEXT:    vcvtsi2sdl 8(%ebp), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovsd %xmm0, (%esp)
+; AVX_X86-NEXT:    fldl (%esp)
+; AVX_X86-NEXT:    movl %ebp, %esp
+; AVX_X86-NEXT:    popl %ebp
+; AVX_X86-NEXT:    retl
 entry:
   %0 = sitofp i32 %a to double
   ret double %0
@@ -28,6 +63,40 @@ define double @int_to_double_rm(i32* %a) {
 ; AVX:       # BB#0: # %entry
 ; AVX-NEXT:    vcvtsi2sdl (%rdi), %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; SSE2_X86-LABEL: int_to_double_rm:
+; SSE2_X86:       # BB#0: # %entry
+; SSE2_X86-NEXT:    pushl %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 8
+; SSE2_X86-NEXT:    .cfi_offset %ebp, -8
+; SSE2_X86-NEXT:    movl %esp, %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa_register %ebp
+; SSE2_X86-NEXT:    andl $-8, %esp
+; SSE2_X86-NEXT:    subl $8, %esp
+; SSE2_X86-NEXT:    movl 8(%ebp), %eax
+; SSE2_X86-NEXT:    cvtsi2sdl (%eax), %xmm0
+; SSE2_X86-NEXT:    movsd %xmm0, (%esp)
+; SSE2_X86-NEXT:    fldl (%esp)
+; SSE2_X86-NEXT:    movl %ebp, %esp
+; SSE2_X86-NEXT:    popl %ebp
+; SSE2_X86-NEXT:    retl
+;
+; AVX_X86-LABEL: int_to_double_rm:
+; AVX_X86:       # BB#0: # %entry
+; AVX_X86-NEXT:    pushl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    .cfi_offset %ebp, -8
+; AVX_X86-NEXT:    movl %esp, %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_register %ebp
+; AVX_X86-NEXT:    andl $-8, %esp
+; AVX_X86-NEXT:    subl $8, %esp
+; AVX_X86-NEXT:    movl 8(%ebp), %eax
+; AVX_X86-NEXT:    vcvtsi2sdl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovsd %xmm0, (%esp)
+; AVX_X86-NEXT:    fldl (%esp)
+; AVX_X86-NEXT:    movl %ebp, %esp
+; AVX_X86-NEXT:    popl %ebp
+; AVX_X86-NEXT:    retl
 entry:
   %0 = load i32, i32* %a
   %1 = sitofp i32 %0 to double
@@ -44,6 +113,27 @@ define float @int_to_float_rr(i32 %a) {
 ; AVX:       # BB#0: # %entry
 ; AVX-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; SSE2_X86-LABEL: int_to_float_rr:
+; SSE2_X86:       # BB#0: # %entry
+; SSE2_X86-NEXT:    pushl %eax
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 8
+; SSE2_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE2_X86-NEXT:    cvtsi2ssl %eax, %xmm0
+; SSE2_X86-NEXT:    movss %xmm0, (%esp)
+; SSE2_X86-NEXT:    flds (%esp)
+; SSE2_X86-NEXT:    popl %eax
+; SSE2_X86-NEXT:    retl
+;
+; AVX_X86-LABEL: int_to_float_rr:
+; AVX_X86:       # BB#0: # %entry
+; AVX_X86-NEXT:    pushl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
+; AVX_X86-NEXT:    flds (%esp)
+; AVX_X86-NEXT:    popl %eax
+; AVX_X86-NEXT:    retl
 entry:
   %0 = sitofp i32 %a to float
   ret float %0
@@ -59,6 +149,28 @@ define float @int_to_float_rm(i32* %a) {
 ; AVX:       # BB#0: # %entry
 ; AVX-NEXT:    vcvtsi2ssl (%rdi), %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; SSE2_X86-LABEL: int_to_float_rm:
+; SSE2_X86:       # BB#0: # %entry
+; SSE2_X86-NEXT:    pushl %eax
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 8
+; SSE2_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE2_X86-NEXT:    cvtsi2ssl (%eax), %xmm0
+; SSE2_X86-NEXT:    movss %xmm0, (%esp)
+; SSE2_X86-NEXT:    flds (%esp)
+; SSE2_X86-NEXT:    popl %eax
+; SSE2_X86-NEXT:    retl
+;
+; AVX_X86-LABEL: int_to_float_rm:
+; AVX_X86:       # BB#0: # %entry
+; AVX_X86-NEXT:    pushl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX_X86-NEXT:    vcvtsi2ssl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
+; AVX_X86-NEXT:    flds (%esp)
+; AVX_X86-NEXT:    popl %eax
+; AVX_X86-NEXT:    retl
 entry:
   %0 = load i32, i32* %a
   %1 = sitofp i32 %0 to float
-- 
2.7.4