From 890b9b966c9b155b36cf2811be8a289eeb0d9b96 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 30 Oct 2009 07:32:26 -0700 Subject: [PATCH] Optimize -mstackrealign. gcc/ 2009-10-30 H.J. Lu PR target/40836 * cfgexpand.c (expand_stack_alignment): Call update_stack_boundary first. Move assert on stack_alignment_estimated just before setting stack_realign_needed. (gimple_expand_cfg): Initialize stack_alignment_estimated to 0. Don't call update_stack_boundary. * config/i386/i386.c (ix86_minimum_incoming_stack_boundary): New. (verride_options): Don't check ix86_force_align_arg_pointer here. (ix86_function_ok_for_sibcall): Use it. (ix86_update_stack_boundary): Likewise. * config/i386/i386.h (STACK_REALIGN_DEFAULT): Update comments. gcc/testsuite/ 2009-10-30 H.J. Lu PR target/40838 * gcc.target/i386/incoming-6.c: New. * gcc.target/i386/incoming-7.c: Likewise. * gcc.target/i386/incoming-8.c: Likewise. * gcc.target/i386/incoming-9.c: Likewise. * gcc.target/i386/incoming-10.c: Likewise. * gcc.target/i386/incoming-11.c: Likewise. * gcc.target/i386/incoming-12.c: Likewise. * gcc.target/i386/incoming-13.c: Likewise. * gcc.target/i386/incoming-14.c: Likewise. * gcc.target/i386/incoming-15.c: Likewise. * gcc.target/i386/pr37843-4.c: Likewise. From-SVN: r153750 --- gcc/ChangeLog | 22 +++++++++-- gcc/cfgexpand.c | 36 ++++++++---------- gcc/config/i386/i386.c | 57 ++++++++++++++++++++--------- gcc/config/i386/i386.h | 4 +- gcc/testsuite/ChangeLog | 15 ++++++++ gcc/testsuite/gcc.target/i386/incoming-10.c | 19 ++++++++++ gcc/testsuite/gcc.target/i386/incoming-11.c | 18 +++++++++ gcc/testsuite/gcc.target/i386/incoming-12.c | 20 ++++++++++ gcc/testsuite/gcc.target/i386/incoming-13.c | 15 ++++++++ gcc/testsuite/gcc.target/i386/incoming-14.c | 15 ++++++++ gcc/testsuite/gcc.target/i386/incoming-15.c | 15 ++++++++ gcc/testsuite/gcc.target/i386/incoming-6.c | 17 +++++++++ gcc/testsuite/gcc.target/i386/incoming-7.c | 16 ++++++++ gcc/testsuite/gcc.target/i386/incoming-8.c | 18 +++++++++ gcc/testsuite/gcc.target/i386/incoming-9.c | 18 +++++++++ gcc/testsuite/gcc.target/i386/pr37843-4.c | 13 +++++++ 16 files changed, 274 insertions(+), 44 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/incoming-10.c create mode 100644 gcc/testsuite/gcc.target/i386/incoming-11.c create mode 100644 gcc/testsuite/gcc.target/i386/incoming-12.c create mode 100644 gcc/testsuite/gcc.target/i386/incoming-13.c create mode 100644 gcc/testsuite/gcc.target/i386/incoming-14.c create mode 100644 gcc/testsuite/gcc.target/i386/incoming-15.c create mode 100644 gcc/testsuite/gcc.target/i386/incoming-6.c create mode 100644 gcc/testsuite/gcc.target/i386/incoming-7.c create mode 100644 gcc/testsuite/gcc.target/i386/incoming-8.c create mode 100644 gcc/testsuite/gcc.target/i386/incoming-9.c create mode 100644 gcc/testsuite/gcc.target/i386/pr37843-4.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c060b6f..af36bc6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2009-10-30 H.J. Lu + + PR target/40836 + * cfgexpand.c (expand_stack_alignment): Call update_stack_boundary + first. Move assert on stack_alignment_estimated just before + setting stack_realign_needed. + (gimple_expand_cfg): Initialize stack_alignment_estimated to 0. + Don't call update_stack_boundary. + + * config/i386/i386.c (ix86_minimum_incoming_stack_boundary): New. + (verride_options): Don't check ix86_force_align_arg_pointer here. + (ix86_function_ok_for_sibcall): Use it. + (ix86_update_stack_boundary): Likewise. + + * config/i386/i386.h (STACK_REALIGN_DEFAULT): Update comments. + 2009-10-30 Richard Earnshaw * arm.md (QHSI): New mode iterator. @@ -13,9 +29,9 @@ PR debug/41700 * dwarf2out.c (dwarf2_debug_hooks): Add entries for new hook (two locations in the source). - (store_vcall_insn): New function. - (lookup_vcall_insn): New function. - (dwarf2out_virtual_call_token): Use store_vcall_insn. + (store_vcall_insn): New function. + (lookup_vcall_insn): New function. + (dwarf2out_virtual_call_token): Use store_vcall_insn. (dwarf2out_copy_call_info): New function. (dwarf2out_virtual_call): Use lookup_vcall_insn. * emit-rtl.c (try_split): Call copy_call_info debug hook. diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index fdc4de5..33e7579 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -3447,8 +3447,18 @@ expand_stack_alignment (void) || crtl->has_nonlocal_goto) crtl->need_drap = true; - gcc_assert (crtl->stack_alignment_needed - <= crtl->stack_alignment_estimated); + /* Call update_stack_boundary here again to update incoming stack + boundary. It may set incoming stack alignment to a different + value after RTL expansion. TARGET_FUNCTION_OK_FOR_SIBCALL may + use the minimum incoming stack alignment to check if it is OK + to perform sibcall optimization since sibcall optimization will + only align the outgoing stack to incoming stack boundary. */ + if (targetm.calls.update_stack_boundary) + targetm.calls.update_stack_boundary (); + + /* The incoming stack frame has to be aligned at least at + parm_stack_boundary. */ + gcc_assert (crtl->parm_stack_boundary <= INCOMING_STACK_BOUNDARY); /* Update crtl->stack_alignment_estimated and use it later to align stack. We check PREFERRED_STACK_BOUNDARY if there may be non-call @@ -3464,6 +3474,9 @@ expand_stack_alignment (void) if (preferred_stack_boundary > crtl->stack_alignment_needed) crtl->stack_alignment_needed = preferred_stack_boundary; + gcc_assert (crtl->stack_alignment_needed + <= crtl->stack_alignment_estimated); + crtl->stack_realign_needed = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated; crtl->stack_realign_tried = crtl->stack_realign_needed; @@ -3540,7 +3553,7 @@ gimple_expand_cfg (void) targetm.expand_to_rtl_hook (); crtl->stack_alignment_needed = STACK_BOUNDARY; crtl->max_used_stack_slot_alignment = STACK_BOUNDARY; - crtl->stack_alignment_estimated = STACK_BOUNDARY; + crtl->stack_alignment_estimated = 0; crtl->preferred_stack_boundary = STACK_BOUNDARY; cfun->cfg->max_jumptable_ents = 0; @@ -3604,23 +3617,6 @@ gimple_expand_cfg (void) if (crtl->stack_protect_guard) stack_protect_prologue (); - /* Update stack boundary if needed. */ - if (SUPPORTS_STACK_ALIGNMENT) - { - /* Call update_stack_boundary here to update incoming stack - boundary before TARGET_FUNCTION_OK_FOR_SIBCALL is called. - TARGET_FUNCTION_OK_FOR_SIBCALL needs to know the accurate - incoming stack alignment to check if it is OK to perform - sibcall optimization since sibcall optimization will only - align the outgoing stack to incoming stack boundary. */ - if (targetm.calls.update_stack_boundary) - targetm.calls.update_stack_boundary (); - - /* The incoming stack frame has to be aligned at least at - parm_stack_boundary. */ - gcc_assert (crtl->parm_stack_boundary <= INCOMING_STACK_BOUNDARY); - } - expand_phi_nodes (&SA); /* Register rtl specific functions for cfg. */ diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 349df7a..5d6eb0f 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1905,6 +1905,7 @@ static bool ix86_valid_target_attribute_p (tree, tree, tree, int); static bool ix86_valid_target_attribute_inner_p (tree, char *[]); static bool ix86_can_inline_p (tree, tree); static void ix86_set_current_function (tree); +static unsigned int ix86_minimum_incoming_stack_boundary (bool); static enum calling_abi ix86_function_abi (const_tree); @@ -3239,12 +3240,10 @@ override_options (bool main_args_p) if (ix86_force_align_arg_pointer == -1) ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT; + ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY; + /* Validate -mincoming-stack-boundary= value or default it to MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */ - if (ix86_force_align_arg_pointer) - ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY; - else - ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY; ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary; if (ix86_incoming_stack_boundary_string) { @@ -4277,7 +4276,8 @@ ix86_function_ok_for_sibcall (tree decl, tree exp) /* If we need to align the outgoing stack, then sibcalling would unalign the stack, which may break the called function. */ - if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY) + if (ix86_minimum_incoming_stack_boundary (true) + < PREFERRED_STACK_BOUNDARY) return false; if (decl) @@ -8196,37 +8196,58 @@ find_drap_reg (void) } } -/* Update incoming stack boundary and estimated stack alignment. */ +/* Return minimum incoming stack alignment. */ -static void -ix86_update_stack_boundary (void) +static unsigned int +ix86_minimum_incoming_stack_boundary (bool sibcall) { + unsigned int incoming_stack_boundary; + /* Prefer the one specified at command line. */ - ix86_incoming_stack_boundary - = (ix86_user_incoming_stack_boundary - ? ix86_user_incoming_stack_boundary - : ix86_default_incoming_stack_boundary); + if (ix86_user_incoming_stack_boundary) + incoming_stack_boundary = ix86_user_incoming_stack_boundary; + /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary + if -mstackrealign is used, it isn't used for sibcall check and + estimated stack alignment is 128bit. */ + else if (!sibcall + && !TARGET_64BIT + && ix86_force_align_arg_pointer + && crtl->stack_alignment_estimated == 128) + incoming_stack_boundary = MIN_STACK_BOUNDARY; + else + incoming_stack_boundary = ix86_default_incoming_stack_boundary; /* Incoming stack alignment can be changed on individual functions via force_align_arg_pointer attribute. We use the smallest incoming stack boundary. */ - if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY + if (incoming_stack_boundary > MIN_STACK_BOUNDARY && lookup_attribute (ix86_force_align_arg_pointer_string, TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) - ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY; + incoming_stack_boundary = MIN_STACK_BOUNDARY; /* The incoming stack frame has to be aligned at least at parm_stack_boundary. */ - if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary) - ix86_incoming_stack_boundary = crtl->parm_stack_boundary; + if (incoming_stack_boundary < crtl->parm_stack_boundary) + incoming_stack_boundary = crtl->parm_stack_boundary; /* Stack at entrance of main is aligned by runtime. We use the smallest incoming stack boundary. */ - if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY + if (incoming_stack_boundary > MAIN_STACK_BOUNDARY && DECL_NAME (current_function_decl) && MAIN_NAME_P (DECL_NAME (current_function_decl)) && DECL_FILE_SCOPE_P (current_function_decl)) - ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY; + incoming_stack_boundary = MAIN_STACK_BOUNDARY; + + return incoming_stack_boundary; +} + +/* Update incoming stack boundary and estimated stack alignment. */ + +static void +ix86_update_stack_boundary (void) +{ + ix86_incoming_stack_boundary + = ix86_minimum_incoming_stack_boundary (false); /* x86_64 vararg needs 16byte stack alignment for register save area. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 33a5077..22187a9 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -706,9 +706,7 @@ enum target_cpu_default generate an alternate prologue and epilogue that realigns the runtime stack if nessary. This supports mixing codes that keep a 4-byte aligned stack, as specified by i386 psABI, with codes that - need a 16-byte aligned stack, as required by SSE instructions. If - STACK_REALIGN_DEFAULT is 1 and PREFERRED_STACK_BOUNDARY_DEFAULT is - 128, stacks for all functions may be realigned. */ + need a 16-byte aligned stack, as required by SSE instructions. */ #define STACK_REALIGN_DEFAULT 0 /* Boundary (in *bits*) on which the incoming stack is aligned. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0df4a2f..29676c6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,18 @@ +2009-10-30 H.J. Lu + + PR target/40838 + * gcc.target/i386/incoming-6.c: New. + * gcc.target/i386/incoming-7.c: Likewise. + * gcc.target/i386/incoming-8.c: Likewise. + * gcc.target/i386/incoming-9.c: Likewise. + * gcc.target/i386/incoming-10.c: Likewise. + * gcc.target/i386/incoming-11.c: Likewise. + * gcc.target/i386/incoming-12.c: Likewise. + * gcc.target/i386/incoming-13.c: Likewise. + * gcc.target/i386/incoming-14.c: Likewise. + * gcc.target/i386/incoming-15.c: Likewise. + * gcc.target/i386/pr37843-4.c: Likewise. + 2009-10-30 Dodji Seketeli PR c++/41863 diff --git a/gcc/testsuite/gcc.target/i386/incoming-10.c b/gcc/testsuite/gcc.target/i386/incoming-10.c new file mode 100644 index 0000000..31d9e61 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/incoming-10.c @@ -0,0 +1,19 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -mstackrealign -fomit-frame-pointer -O3 -march=barcelona -mpreferred-stack-boundary=4" } */ + +struct s { + int x[8]; +}; + +void g(struct s *); + +void f() +{ + int i; + struct s s; + for (i = 0; i < sizeof(s.x) / sizeof(*s.x); i++) s.x[i] = 0; + g(&s); +} + +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/incoming-11.c b/gcc/testsuite/gcc.target/i386/incoming-11.c new file mode 100644 index 0000000..e5787af --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/incoming-11.c @@ -0,0 +1,18 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -mstackrealign -fomit-frame-pointer -O3 -march=barcelona -mpreferred-stack-boundary=4" } */ + +void g(); + +int p[100]; +int q[100]; + +void f() +{ + int i; + for (i = 0; i < 100; i++) p[i] = 0; + g(); + for (i = 0; i < 100; i++) q[i] = 0; +} + +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/incoming-12.c b/gcc/testsuite/gcc.target/i386/incoming-12.c new file mode 100644 index 0000000..d7ef103 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/incoming-12.c @@ -0,0 +1,20 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -mstackrealign -O2 -msse2 -mpreferred-stack-boundary=4" } */ + +typedef int v4si __attribute__ ((vector_size (16))); + +struct x { + v4si v; + v4si w; +}; + +void y(void *); + +v4si x(void) +{ + struct x x; + y(&x); +} + +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/incoming-13.c b/gcc/testsuite/gcc.target/i386/incoming-13.c new file mode 100644 index 0000000..bbc8993 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/incoming-13.c @@ -0,0 +1,15 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -mstackrealign -O2 -mpreferred-stack-boundary=4" } */ + +extern double y(double *s3); + +extern double s1, s2; + +double x(void) +{ + double s3 = s1 + s2; + return y(&s3); +} + +/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/incoming-14.c b/gcc/testsuite/gcc.target/i386/incoming-14.c new file mode 100644 index 0000000..d27179d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/incoming-14.c @@ -0,0 +1,15 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -mstackrealign -O2 -mpreferred-stack-boundary=4" } */ + +extern int y(int *s3); + +extern int s1, s2; + +int x(void) +{ + int s3 = s1 + s2; + return y(&s3); +} + +/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/incoming-15.c b/gcc/testsuite/gcc.target/i386/incoming-15.c new file mode 100644 index 0000000..e6a1749 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/incoming-15.c @@ -0,0 +1,15 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -mstackrealign -O2 -mpreferred-stack-boundary=4" } */ + +extern long long y(long long *s3); + +extern long long s1, s2; + +long long x(void) +{ + long long s3 = s1 + s2; + return y(&s3); +} + +/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/incoming-6.c b/gcc/testsuite/gcc.target/i386/incoming-6.c new file mode 100644 index 0000000..5cc4ab3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/incoming-6.c @@ -0,0 +1,17 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -mstackrealign -O2 -msse2 -mpreferred-stack-boundary=4" } */ + +typedef int v4si __attribute__ ((vector_size (16))); + +extern v4si y(v4si *s3); + +extern v4si s1, s2; + +v4si x(void) +{ + v4si s3 = s1 + s2; + return y(&s3); +} + +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/incoming-7.c b/gcc/testsuite/gcc.target/i386/incoming-7.c new file mode 100644 index 0000000..cdd6037 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/incoming-7.c @@ -0,0 +1,16 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -mstackrealign -O2 -msse2 -mpreferred-stack-boundary=4" } */ + +typedef int v4si __attribute__ ((vector_size (16))); + +extern v4si y(v4si, v4si, v4si, v4si, v4si); + +extern v4si s1, s2; + +v4si x(void) +{ + return y(s1, s2, s1, s2, s2); +} + +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/incoming-8.c b/gcc/testsuite/gcc.target/i386/incoming-8.c new file mode 100644 index 0000000..2dd8800 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/incoming-8.c @@ -0,0 +1,18 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -mstackrealign -O3 -msse2 -mpreferred-stack-boundary=4" } */ + +float +foo (float f) +{ + float array[128]; + float x; + int i; + for (i = 0; i < sizeof(array) / sizeof(*array); i++) + array[i] = f; + for (i = 0; i < sizeof(array) / sizeof(*array); i++) + x += array[i]; + return x; +} + +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/incoming-9.c b/gcc/testsuite/gcc.target/i386/incoming-9.c new file mode 100644 index 0000000..e43cbd6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/incoming-9.c @@ -0,0 +1,18 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -mstackrealign -O3 -mno-sse -mpreferred-stack-boundary=4" } */ + +float +foo (float f) +{ + float array[128]; + float x; + int i; + for (i = 0; i < sizeof(array) / sizeof(*array); i++) + array[i] = f; + for (i = 0; i < sizeof(array) / sizeof(*array); i++) + x += array[i]; + return x; +} + +/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr37843-4.c b/gcc/testsuite/gcc.target/i386/pr37843-4.c new file mode 100644 index 0000000..8e5f51f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr37843-4.c @@ -0,0 +1,13 @@ +/* Test for stack alignment with sibcall optimization. */ +/* { dg-do compile { target { ilp32 && nonpic } } } */ +/* { dg-options "-O2 -msse2 -mpreferred-stack-boundary=4 -mstackrealign" } */ +/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%\[re\]?sp" } } */ +/* { dg-final { scan-assembler-not "call\[\\t \]*foo" } } */ +/* { dg-final { scan-assembler "jmp\[\\t \]*foo" } } */ + +extern int foo (void); + +int bar (void) +{ + return foo(); +} -- 2.7.4