[X86] Change the scheduler model for 'pentium4' to SandyBridgeModel.

author Craig Topper <craig.topper@intel.com>

Fri, 17 Jul 2020 05:04:24 +0000 (22:04 -0700)

committer Craig Topper <craig.topper@intel.com>

Fri, 17 Jul 2020 05:04:29 +0000 (22:04 -0700)
author Craig Topper <craig.topper@intel.com>
Fri, 17 Jul 2020 05:04:24 +0000 (22:04 -0700)
committer Craig Topper <craig.topper@intel.com>
Fri, 17 Jul 2020 05:04:29 +0000 (22:04 -0700)
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td

index 10d3007..8ca6dac 100644 (file)
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1080,7 +1080,7 @@ foreach P = ["pentium4", "pentium4m"] in {
    // Since 'pentium4' is the default 32-bit CPU on Linux and Windows,
    // give it more modern tunings.
    // FIXME: This wouldn't be needed if we supported mtune.
-  def : ProcessorModel<P, GenericPostRAModel,
+  def : ProcessorModel<P, SandyBridgeModel,
                         [FeatureX87, FeatureCMPXCHG8B,
                          FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
                          FeatureCMOV, FeatureInsertVZEROUPPER,
diff --git a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll

index 380c18f..4446f36 100644 (file)
--- a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
+++ b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
@@ -1,9 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_rip
  ; RUN: llc < %s -mtriple=i686-pc-windows-msvc | FileCheck %s -check-prefix=X32
  ; Control Flow Guard is currently only available on Windows
  
  
  ; Test that Control Flow Guard checks are correctly added for x86 vector calls.
  define void @func_cf_vector_x86(void (%struct.HVA)* %0, %struct.HVA* %1) #0 {
+; X32-LABEL: func_cf_vector_x86:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    andl $-16, %esp
+; X32-NEXT:    subl $48, %esp
+; X32-NEXT:    movl 8(%ebp), %ecx
+; X32-NEXT:    movl 12(%ebp), %eax
+; X32-NEXT:    movups (%eax), %xmm0
+; X32-NEXT:    movups 16(%eax), %xmm1
+; X32-NEXT:    movaps %xmm0, (%esp)
+; X32-NEXT:    movaps %xmm1, 16(%esp)
+; X32-NEXT:    movsd (%esp), %xmm4
+; X32-NEXT:    movsd 8(%esp), %xmm5
+; X32-NEXT:    movsd 16(%esp), %xmm6
+; X32-NEXT:    movsd 24(%esp), %xmm7
+; X32-NEXT:    calll *___guard_check_icall_fptr
+; X32-NEXT:    movaps %xmm4, %xmm0
+; X32-NEXT:    movaps %xmm5, %xmm1
+; X32-NEXT:    movaps %xmm6, %xmm2
+; X32-NEXT:    movaps %xmm7, %xmm3
+; X32-NEXT:    calll *%ecx
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
  entry:
    %2 = alloca %struct.HVA, align 8
    %3 = bitcast %struct.HVA* %2 to i8*
@@ -13,23 +39,6 @@ entry:
    call x86_vectorcallcc void %0(%struct.HVA inreg %5)
    ret void
  
-  ; X32-LABEL: func_cf_vector_x86
-  ; X32:            movl 12(%ebp), %eax
-  ; X32:            movl 8(%ebp), %ecx
-  ; X32:            movups     (%eax), %xmm0
-  ; X32:            movups     16(%eax), %xmm1
-  ; X32:            movaps     %xmm0, (%esp)
-  ; X32:            movaps     %xmm1, 16(%esp)
-  ; X32:            movsd      (%esp), %xmm4
-  ; X32:            movsd      8(%esp), %xmm5
-  ; X32:            movsd      16(%esp), %xmm6
-  ; X32:            movsd      24(%esp), %xmm7
-  ; X32:            calll *___guard_check_icall_fptr
-  ; X32:            movaps %xmm4, %xmm0
-  ; X32:            movaps %xmm5, %xmm1
-  ; X32:            movaps %xmm6, %xmm2
-  ; X32:            movaps %xmm7, %xmm3
-  ; X32:            calll  *%ecx
  }
  attributes #0 = { "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
  
diff --git a/llvm/test/CodeGen/X86/cmov-fp.ll b/llvm/test/CodeGen/X86/cmov-fp.ll

index 756324b..6bbad42 100644 (file)
--- a/llvm/test/CodeGen/X86/cmov-fp.ll
+++ b/llvm/test/CodeGen/X86/cmov-fp.ll
@@ -1056,11 +1056,11 @@ define float @test16(i32 %a, i32 %b, float %x) nounwind {
  define x86_fp80 @test17(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  ; SSE-LABEL: test17:
  ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    flds {{\.LCPI.*}}
  ; SSE-NEXT:    fxch %st(1)
-; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    fcmovnbe %st(1), %st
  ; SSE-NEXT:    fstp %st(1)
  ; SSE-NEXT:    retl
@@ -1109,11 +1109,11 @@ define x86_fp80 @test17(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  define x86_fp80 @test18(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  ; SSE-LABEL: test18:
  ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    flds {{\.LCPI.*}}
  ; SSE-NEXT:    fxch %st(1)
-; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    fcmovnb %st(1), %st
  ; SSE-NEXT:    fstp %st(1)
  ; SSE-NEXT:    retl
@@ -1162,11 +1162,11 @@ define x86_fp80 @test18(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  define x86_fp80 @test19(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  ; SSE-LABEL: test19:
  ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    flds {{\.LCPI.*}}
  ; SSE-NEXT:    fxch %st(1)
-; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    fcmovb %st(1), %st
  ; SSE-NEXT:    fstp %st(1)
  ; SSE-NEXT:    retl
@@ -1215,11 +1215,11 @@ define x86_fp80 @test19(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  define x86_fp80 @test20(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  ; SSE-LABEL: test20:
  ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    flds {{\.LCPI.*}}
  ; SSE-NEXT:    fxch %st(1)
-; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    fcmovbe %st(1), %st
  ; SSE-NEXT:    fstp %st(1)
  ; SSE-NEXT:    retl
@@ -1268,13 +1268,13 @@ define x86_fp80 @test20(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  define x86_fp80 @test21(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  ; SSE-LABEL: test21:
  ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE-NEXT:    flds {{\.LCPI.*}}
-; SSE-NEXT:    fxch %st(1)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    setg %al
  ; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    flds {{\.LCPI.*}}
+; SSE-NEXT:    fxch %st(1)
  ; SSE-NEXT:    fcmovne %st(1), %st
  ; SSE-NEXT:    fstp %st(1)
  ; SSE-NEXT:    retl
@@ -1328,13 +1328,13 @@ define x86_fp80 @test21(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  define x86_fp80 @test22(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  ; SSE-LABEL: test22:
  ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE-NEXT:    flds {{\.LCPI.*}}
-; SSE-NEXT:    fxch %st(1)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    setge %al
  ; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    flds {{\.LCPI.*}}
+; SSE-NEXT:    fxch %st(1)
  ; SSE-NEXT:    fcmovne %st(1), %st
  ; SSE-NEXT:    fstp %st(1)
  ; SSE-NEXT:    retl
@@ -1387,13 +1387,13 @@ define x86_fp80 @test22(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  define x86_fp80 @test23(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  ; SSE-LABEL: test23:
  ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE-NEXT:    flds {{\.LCPI.*}}
-; SSE-NEXT:    fxch %st(1)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    setl %al
  ; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    flds {{\.LCPI.*}}
+; SSE-NEXT:    fxch %st(1)
  ; SSE-NEXT:    fcmovne %st(1), %st
  ; SSE-NEXT:    fstp %st(1)
  ; SSE-NEXT:    retl
@@ -1446,13 +1446,13 @@ define x86_fp80 @test23(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  define x86_fp80 @test24(i32 %a, i32 %b, x86_fp80 %x) nounwind {
  ; SSE-LABEL: test24:
  ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE-NEXT:    flds {{\.LCPI.*}}
-; SSE-NEXT:    fxch %st(1)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
  ; SSE-NEXT:    setle %al
  ; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    flds {{\.LCPI.*}}
+; SSE-NEXT:    fxch %st(1)
  ; SSE-NEXT:    fcmovne %st(1), %st
  ; SSE-NEXT:    fstp %st(1)
  ; SSE-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/post-ra-sched.ll b/llvm/test/CodeGen/X86/post-ra-sched.ll

index f6de77a..70882fb 100644 (file)
--- a/llvm/test/CodeGen/X86/post-ra-sched.ll
+++ b/llvm/test/CodeGen/X86/post-ra-sched.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -mtriple=i386 -mcpu=pentium4 | FileCheck %s
-; RUN: llc < %s -mtriple=i386 -mcpu=pentium4m | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386 -mcpu=pentium4 | FileCheck %s --check-prefix=PENTIUM4
+; RUN: llc < %s -mtriple=i386 -mcpu=pentium4m | FileCheck %s --check-prefix=PENTIUM4
  ; RUN: llc < %s -mtriple=i386 -mcpu=pentium-m | FileCheck %s
  ; RUN: llc < %s -mtriple=i386 -mcpu=prescott | FileCheck %s
  ; RUN: llc < %s -mtriple=i386 -mcpu=nocona | FileCheck %s
@@ -9,12 +10,26 @@
  ; happens during the post-RA-scheduler, which should be enabled by
  ; default with the above specified cpus.
  
+; Pentium4 is the default 32-bit CPU on Linux and currently has the postRA
+; scheduler disabled. Leaving the command lines in place in case we change that.
+
  @ptrs = external global [0 x i32*], align 4
  @idxa = common global i32 0, align 4
  @idxb = common global i32 0, align 4
  @res = common global i32 0, align 4
  
  define void @addindirect() {
+; PENTIUM4-LABEL: addindirect:
+; PENTIUM4:       # %bb.0: # %entry
+; PENTIUM4-NEXT:    movl idxa, %eax
+; PENTIUM4-NEXT:    movl ptrs(,%eax,4), %eax
+; PENTIUM4-NEXT:    movl idxb, %ecx
+; PENTIUM4-NEXT:    movl ptrs(,%ecx,4), %ecx
+; PENTIUM4-NEXT:    movl (%ecx), %ecx
+; PENTIUM4-NEXT:    addl (%eax), %ecx
+; PENTIUM4-NEXT:    movl %ecx, res
+; PENTIUM4-NEXT:    retl
+;
  ; CHECK-LABEL: addindirect:
  ; CHECK:       # %bb.0: # %entry
  ; CHECK-NEXT:    movl idxb, %ecx
diff --git a/llvm/test/CodeGen/X86/pr34088.ll b/llvm/test/CodeGen/X86/pr34088.ll

index 6950e50..a57ff09 100644 (file)
--- a/llvm/test/CodeGen/X86/pr34088.ll
+++ b/llvm/test/CodeGen/X86/pr34088.ll
@@ -6,7 +6,7 @@
  %struct.Buffer = type { i8*, i32 }
  
  ; This test checks that the load of store %2 is not dropped.
-; 
+;
  define i32 @pr34088() local_unnamed_addr {
  ; CHECK-LABEL: pr34088:
  ; CHECK:       # %bb.0: # %entry
@@ -18,13 +18,13 @@ define i32 @pr34088() local_unnamed_addr {
  ; CHECK-NEXT:    andl $-16, %esp
  ; CHECK-NEXT:    subl $32, %esp
  ; CHECK-NEXT:    xorps %xmm0, %xmm0
-; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205]
-; CHECK-NEXT:    xorl %eax, %eax
  ; CHECK-NEXT:    movaps %xmm0, (%esp)
  ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT:    movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD
+; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205]
  ; CHECK-NEXT:    movaps %xmm1, (%esp)
+; CHECK-NEXT:    movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD
  ; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    xorl %eax, %eax
  ; CHECK-NEXT:    movl %ebp, %esp
  ; CHECK-NEXT:    popl %ebp
  ; CHECK-NEXT:    .cfi_def_cfa %esp, 4
diff --git a/llvm/test/CodeGen/X86/pr40539.ll b/llvm/test/CodeGen/X86/pr40539.ll

index f2135cd..f52fec5 100644 (file)
--- a/llvm/test/CodeGen/X86/pr40539.ll
+++ b/llvm/test/CodeGen/X86/pr40539.ll
@@ -40,7 +40,6 @@ define zeroext i1 @_Z8test_cosv() {
  ; CHECK-NEXT:    subl $8, %esp
  ; CHECK-NEXT:    .cfi_def_cfa_offset 12
  ; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
  ; CHECK-NEXT:    divss {{\.LCPI.*}}, %xmm0
  ; CHECK-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
  ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -49,6 +48,7 @@ define zeroext i1 @_Z8test_cosv() {
  ; CHECK-NEXT:    #NO_APP
  ; CHECK-NEXT:    fstps (%esp)
  ; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
  ; CHECK-NEXT:    ucomiss %xmm0, %xmm1
  ; CHECK-NEXT:    setae %cl
  ; CHECK-NEXT:    ucomiss {{\.LCPI.*}}, %xmm0
diff --git a/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll b/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll

index 26fe7c4..c604234 100644 (file)
--- a/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll
@@ -15,9 +15,9 @@
  ; CHECK:         subl    $20, %esp
  ; CHECK:         .cv_fpo_stackalloc      20
  ; CHECK:         .cv_fpo_endprologue
+; CHECK:         movl    28(%esp), %esi
  ; CHECK:         ___security_cookie
  
-; CHECK:         movl    28(%esp), %esi
  ; CHECK:         movl    %esi, {{[0-9]*}}(%esp)
  ; CHECK:         movl    %esi, {{[0-9]*}}(%esp)
  ; CHECK:         movl    %esi, {{[0-9]*}}(%esp)
@@ -30,7 +30,7 @@
  ; CHECK:         addl    $20, %esp
  ; CHECK:         popl    %esi
  ; CHECK:         retl
-; CHECK: Ltmp3:
+; CHECK: Ltmp2:
  ; CHECK:         .cv_fpo_endproc
  
  ; ModuleID = 't.c'
author	Craig Topper <craig.topper@intel.com>
	Fri, 17 Jul 2020 05:04:24 +0000 (22:04 -0700)
committer	Craig Topper <craig.topper@intel.com>
	Fri, 17 Jul 2020 05:04:29 +0000 (22:04 -0700)
llvm/lib/Target/X86/X86.td		patch \| blob \| history
llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll		patch \| blob \| history
llvm/test/CodeGen/X86/cmov-fp.ll		patch \| blob \| history
llvm/test/CodeGen/X86/post-ra-sched.ll		patch \| blob \| history
llvm/test/CodeGen/X86/pr34088.ll		patch \| blob \| history
llvm/test/CodeGen/X86/pr40539.ll		patch \| blob \| history
llvm/test/DebugInfo/COFF/fpo-stack-protect.ll		patch \| blob \| history