// Since 'pentium4' is the default 32-bit CPU on Linux and Windows,
// give it more modern tunings.
// FIXME: This wouldn't be needed if we supported mtune.
- def : ProcessorModel<P, GenericPostRAModel,
+ def : ProcessorModel<P, SandyBridgeModel,
[FeatureX87, FeatureCMPXCHG8B,
FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
FeatureCMOV, FeatureInsertVZEROUPPER,
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_rip
; RUN: llc < %s -mtriple=i686-pc-windows-msvc | FileCheck %s -check-prefix=X32
; Control Flow Guard is currently only available on Windows
; Test that Control Flow Guard checks are correctly added for x86 vector calls.
define void @func_cf_vector_x86(void (%struct.HVA)* %0, %struct.HVA* %1) #0 {
+; X32-LABEL: func_cf_vector_x86:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %ebp
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: andl $-16, %esp
+; X32-NEXT: subl $48, %esp
+; X32-NEXT: movl 8(%ebp), %ecx
+; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movups (%eax), %xmm0
+; X32-NEXT: movups 16(%eax), %xmm1
+; X32-NEXT: movaps %xmm0, (%esp)
+; X32-NEXT: movaps %xmm1, 16(%esp)
+; X32-NEXT: movsd (%esp), %xmm4
+; X32-NEXT: movsd 8(%esp), %xmm5
+; X32-NEXT: movsd 16(%esp), %xmm6
+; X32-NEXT: movsd 24(%esp), %xmm7
+; X32-NEXT: calll *___guard_check_icall_fptr
+; X32-NEXT: movaps %xmm4, %xmm0
+; X32-NEXT: movaps %xmm5, %xmm1
+; X32-NEXT: movaps %xmm6, %xmm2
+; X32-NEXT: movaps %xmm7, %xmm3
+; X32-NEXT: calll *%ecx
+; X32-NEXT: movl %ebp, %esp
+; X32-NEXT: popl %ebp
+; X32-NEXT: retl
entry:
%2 = alloca %struct.HVA, align 8
%3 = bitcast %struct.HVA* %2 to i8*
call x86_vectorcallcc void %0(%struct.HVA inreg %5)
ret void
- ; X32-LABEL: func_cf_vector_x86
- ; X32: movl 12(%ebp), %eax
- ; X32: movl 8(%ebp), %ecx
- ; X32: movups (%eax), %xmm0
- ; X32: movups 16(%eax), %xmm1
- ; X32: movaps %xmm0, (%esp)
- ; X32: movaps %xmm1, 16(%esp)
- ; X32: movsd (%esp), %xmm4
- ; X32: movsd 8(%esp), %xmm5
- ; X32: movsd 16(%esp), %xmm6
- ; X32: movsd 24(%esp), %xmm7
- ; X32: calll *___guard_check_icall_fptr
- ; X32: movaps %xmm4, %xmm0
- ; X32: movaps %xmm5, %xmm1
- ; X32: movaps %xmm6, %xmm2
- ; X32: movaps %xmm7, %xmm3
- ; X32: calll *%ecx
}
attributes #0 = { "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
define x86_fp80 @test17(i32 %a, i32 %b, x86_fp80 %x) nounwind {
; SSE-LABEL: test17:
; SSE: # %bb.0:
-; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: fldt {{[0-9]+}}(%esp)
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: flds {{\.LCPI.*}}
; SSE-NEXT: fxch %st(1)
-; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: fcmovnbe %st(1), %st
; SSE-NEXT: fstp %st(1)
; SSE-NEXT: retl
define x86_fp80 @test18(i32 %a, i32 %b, x86_fp80 %x) nounwind {
; SSE-LABEL: test18:
; SSE: # %bb.0:
-; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: fldt {{[0-9]+}}(%esp)
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: flds {{\.LCPI.*}}
; SSE-NEXT: fxch %st(1)
-; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: fcmovnb %st(1), %st
; SSE-NEXT: fstp %st(1)
; SSE-NEXT: retl
define x86_fp80 @test19(i32 %a, i32 %b, x86_fp80 %x) nounwind {
; SSE-LABEL: test19:
; SSE: # %bb.0:
-; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: fldt {{[0-9]+}}(%esp)
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: flds {{\.LCPI.*}}
; SSE-NEXT: fxch %st(1)
-; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: fcmovb %st(1), %st
; SSE-NEXT: fstp %st(1)
; SSE-NEXT: retl
define x86_fp80 @test20(i32 %a, i32 %b, x86_fp80 %x) nounwind {
; SSE-LABEL: test20:
; SSE: # %bb.0:
-; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: fldt {{[0-9]+}}(%esp)
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: flds {{\.LCPI.*}}
; SSE-NEXT: fxch %st(1)
-; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: fcmovbe %st(1), %st
; SSE-NEXT: fstp %st(1)
; SSE-NEXT: retl
define x86_fp80 @test21(i32 %a, i32 %b, x86_fp80 %x) nounwind {
; SSE-LABEL: test21:
; SSE: # %bb.0:
-; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: fldt {{[0-9]+}}(%esp)
-; SSE-NEXT: flds {{\.LCPI.*}}
-; SSE-NEXT: fxch %st(1)
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: setg %al
; SSE-NEXT: testb %al, %al
+; SSE-NEXT: flds {{\.LCPI.*}}
+; SSE-NEXT: fxch %st(1)
; SSE-NEXT: fcmovne %st(1), %st
; SSE-NEXT: fstp %st(1)
; SSE-NEXT: retl
define x86_fp80 @test22(i32 %a, i32 %b, x86_fp80 %x) nounwind {
; SSE-LABEL: test22:
; SSE: # %bb.0:
-; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: fldt {{[0-9]+}}(%esp)
-; SSE-NEXT: flds {{\.LCPI.*}}
-; SSE-NEXT: fxch %st(1)
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: setge %al
; SSE-NEXT: testb %al, %al
+; SSE-NEXT: flds {{\.LCPI.*}}
+; SSE-NEXT: fxch %st(1)
; SSE-NEXT: fcmovne %st(1), %st
; SSE-NEXT: fstp %st(1)
; SSE-NEXT: retl
define x86_fp80 @test23(i32 %a, i32 %b, x86_fp80 %x) nounwind {
; SSE-LABEL: test23:
; SSE: # %bb.0:
-; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: fldt {{[0-9]+}}(%esp)
-; SSE-NEXT: flds {{\.LCPI.*}}
-; SSE-NEXT: fxch %st(1)
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: setl %al
; SSE-NEXT: testb %al, %al
+; SSE-NEXT: flds {{\.LCPI.*}}
+; SSE-NEXT: fxch %st(1)
; SSE-NEXT: fcmovne %st(1), %st
; SSE-NEXT: fstp %st(1)
; SSE-NEXT: retl
define x86_fp80 @test24(i32 %a, i32 %b, x86_fp80 %x) nounwind {
; SSE-LABEL: test24:
; SSE: # %bb.0:
-; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: fldt {{[0-9]+}}(%esp)
-; SSE-NEXT: flds {{\.LCPI.*}}
-; SSE-NEXT: fxch %st(1)
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: setle %al
; SSE-NEXT: testb %al, %al
+; SSE-NEXT: flds {{\.LCPI.*}}
+; SSE-NEXT: fxch %st(1)
; SSE-NEXT: fcmovne %st(1), %st
; SSE-NEXT: fstp %st(1)
; SSE-NEXT: retl
-; RUN: llc < %s -mtriple=i386 -mcpu=pentium4 | FileCheck %s
-; RUN: llc < %s -mtriple=i386 -mcpu=pentium4m | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386 -mcpu=pentium4 | FileCheck %s --check-prefix=PENTIUM4
+; RUN: llc < %s -mtriple=i386 -mcpu=pentium4m | FileCheck %s --check-prefix=PENTIUM4
; RUN: llc < %s -mtriple=i386 -mcpu=pentium-m | FileCheck %s
; RUN: llc < %s -mtriple=i386 -mcpu=prescott | FileCheck %s
; RUN: llc < %s -mtriple=i386 -mcpu=nocona | FileCheck %s
; happens during the post-RA-scheduler, which should be enabled by
; default with the above specified cpus.
+; Pentium4 is the default 32-bit CPU on Linux and currently has the postRA
+; scheduler disabled. Leaving the command lines in place in case we change that.
+
@ptrs = external global [0 x i32*], align 4
@idxa = common global i32 0, align 4
@idxb = common global i32 0, align 4
@res = common global i32 0, align 4
define void @addindirect() {
+; PENTIUM4-LABEL: addindirect:
+; PENTIUM4: # %bb.0: # %entry
+; PENTIUM4-NEXT: movl idxa, %eax
+; PENTIUM4-NEXT: movl ptrs(,%eax,4), %eax
+; PENTIUM4-NEXT: movl idxb, %ecx
+; PENTIUM4-NEXT: movl ptrs(,%ecx,4), %ecx
+; PENTIUM4-NEXT: movl (%ecx), %ecx
+; PENTIUM4-NEXT: addl (%eax), %ecx
+; PENTIUM4-NEXT: movl %ecx, res
+; PENTIUM4-NEXT: retl
+;
; CHECK-LABEL: addindirect:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl idxb, %ecx
%struct.Buffer = type { i8*, i32 }
; This test checks that the load of store %2 is not dropped.
-;
+;
define i32 @pr34088() local_unnamed_addr {
; CHECK-LABEL: pr34088:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: subl $32, %esp
; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205]
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: movaps %xmm0, (%esp)
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD
+; CHECK-NEXT: movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205]
; CHECK-NEXT: movaps %xmm1, (%esp)
+; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: movl %ebp, %esp
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: .cfi_def_cfa %esp, 4
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: divss {{\.LCPI.*}}, %xmm0
; CHECK-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: fstps (%esp)
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss %xmm0, %xmm1
; CHECK-NEXT: setae %cl
; CHECK-NEXT: ucomiss {{\.LCPI.*}}, %xmm0
; CHECK: subl $20, %esp
; CHECK: .cv_fpo_stackalloc 20
; CHECK: .cv_fpo_endprologue
+; CHECK: movl 28(%esp), %esi
; CHECK: ___security_cookie
-; CHECK: movl 28(%esp), %esi
; CHECK: movl %esi, {{[0-9]*}}(%esp)
; CHECK: movl %esi, {{[0-9]*}}(%esp)
; CHECK: movl %esi, {{[0-9]*}}(%esp)
; CHECK: addl $20, %esp
; CHECK: popl %esi
; CHECK: retl
-; CHECK: Ltmp3:
+; CHECK: Ltmp2:
; CHECK: .cv_fpo_endproc
; ModuleID = 't.c'