From e7ed95a24d005410ee3fb772c97189f43dadaee7 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 17 Sep 2010 21:07:09 +0000 Subject: [PATCH] Pad short functions with NOPs for Atom. gcc/ 2010-09-17 H.J. Lu Richard Henderson * config/i386/i386.c (initial_ix86_tune_features): Add X86_TUNE_PAD_SHORT_FUNCTION. (ix86_code_end): Pad with 8 NOPs for TARGET_PAD_SHORT_FUNCTION. (ix86_count_insn): New. (ix86_pad_short_function): Likewise. (ix86_reorg): Support TARGET_PAD_SHORT_FUNCTION. * config/i386/i386.h (ix86_tune_indices): Add X86_TUNE_PAD_SHORT_FUNCTION. (TARGET_PAD_SHORT_FUNCTION): New. * config/i386/i386.md (UNSPEC_NOPS): New. (nops): Likewise. gcc/testsuite/ 2010-09-17 H.J. Lu * gcc.target/i386/pad-1.c: New. * gcc.target/i386/pad-2.c: Likewise. * gcc.target/i386/pad-3.c: Likewise. * gcc.target/i386/pad-4.c: Likewise. * gcc.target/i386/pad-5a.c: Likewise. * gcc.target/i386/pad-5b.c: Likewise. * gcc.target/i386/pad-6a.c: Likewise. * gcc.target/i386/pad-6b.c: Likewise. * gcc.target/i386/pad-7.c: Likewise. * gcc.target/i386/pad-8.c: Likewise. * gcc.target/i386/pad-9.c: Likewise. * gcc.target/i386/pad-10.c: Likewise. Co-Authored-By: Richard Henderson From-SVN: r164379 --- gcc/ChangeLog | 17 +++++ gcc/config/i386/i386.c | 126 ++++++++++++++++++++++++++++++++- gcc/config/i386/i386.h | 3 + gcc/config/i386/i386.md | 34 +++++++++ gcc/testsuite/ChangeLog | 15 ++++ gcc/testsuite/gcc.target/i386/pad-1.c | 9 +++ gcc/testsuite/gcc.target/i386/pad-10.c | 18 +++++ gcc/testsuite/gcc.target/i386/pad-2.c | 9 +++ gcc/testsuite/gcc.target/i386/pad-3.c | 15 ++++ gcc/testsuite/gcc.target/i386/pad-4.c | 13 ++++ gcc/testsuite/gcc.target/i386/pad-5a.c | 12 ++++ gcc/testsuite/gcc.target/i386/pad-5b.c | 12 ++++ gcc/testsuite/gcc.target/i386/pad-6a.c | 12 ++++ gcc/testsuite/gcc.target/i386/pad-6b.c | 12 ++++ gcc/testsuite/gcc.target/i386/pad-7.c | 11 +++ gcc/testsuite/gcc.target/i386/pad-8.c | 11 +++ gcc/testsuite/gcc.target/i386/pad-9.c | 15 ++++ 17 files changed, 343 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/pad-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pad-10.c create mode 100644 gcc/testsuite/gcc.target/i386/pad-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pad-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pad-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pad-5a.c create mode 100644 gcc/testsuite/gcc.target/i386/pad-5b.c create mode 100644 gcc/testsuite/gcc.target/i386/pad-6a.c create mode 100644 gcc/testsuite/gcc.target/i386/pad-6b.c create mode 100644 gcc/testsuite/gcc.target/i386/pad-7.c create mode 100644 gcc/testsuite/gcc.target/i386/pad-8.c create mode 100644 gcc/testsuite/gcc.target/i386/pad-9.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5301e8d..dcfc7e6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,4 +1,21 @@ 2010-09-17 H.J. Lu + Richard Henderson + + * config/i386/i386.c (initial_ix86_tune_features): Add + X86_TUNE_PAD_SHORT_FUNCTION. + (ix86_code_end): Pad with 8 NOPs for TARGET_PAD_SHORT_FUNCTION. + (ix86_count_insn): New. + (ix86_pad_short_function): Likewise. + (ix86_reorg): Support TARGET_PAD_SHORT_FUNCTION. + + * config/i386/i386.h (ix86_tune_indices): Add + X86_TUNE_PAD_SHORT_FUNCTION. + (TARGET_PAD_SHORT_FUNCTION): New. + + * config/i386/i386.md (UNSPEC_NOPS): New. + (nops): Likewise. + +2010-09-17 H.J. Lu PR middle-end/45234 * calls.c (expand_call): Make sure that all variable sized diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index abec057..d9f9237 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1576,6 +1576,9 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_PAD_RETURNS */ m_AMD_MULTIPLE | m_CORE2 | m_GENERIC, + /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */ + m_ATOM, + /* X86_TUNE_EXT_80387_CONSTANTS */ m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC, @@ -8021,6 +8024,11 @@ ix86_code_end (void) xops[0] = gen_rtx_REG (Pmode, regno); xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); + /* Pad stack IP move with 4 instructions. 2 NOPs count as 1 + instruction. */ + if (TARGET_PAD_SHORT_FUNCTION) + output_asm_insn ("nop; nop; nop; nop; nop; nop; nop; nop", + xops); output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); output_asm_insn ("ret", xops); final_end_function (); @@ -27882,6 +27890,120 @@ ix86_pad_returns (void) } } +/* Count the minimum number of instructions in BB. Return 4 if the + number of instructions >= 4. */ + +static int +ix86_count_insn_bb (basic_block bb) +{ + rtx insn; + int insn_count = 0; + + /* Count number of instructions in this block. Return 4 if the number + of instructions >= 4. */ + FOR_BB_INSNS (bb, insn) + { + /* Only happen in exit blocks. */ + if (JUMP_P (insn) + && GET_CODE (PATTERN (insn)) == RETURN) + break; + + if (NONDEBUG_INSN_P (insn) + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER) + { + insn_count++; + if (insn_count >= 4) + return insn_count; + } + } + + return insn_count; +} + + +/* Count the minimum number of instructions in code path in BB. + Return 4 if the number of instructions >= 4. */ + +static int +ix86_count_insn (basic_block bb) +{ + edge e; + edge_iterator ei; + int min_prev_count; + + /* Only bother counting instructions along paths with no + more than 2 basic blocks between entry and exit. Given + that BB has an edge to exit, determine if a predecessor + of BB has an edge from entry. If so, compute the number + of instructions in the predecessor block. If there + happen to be multiple such blocks, compute the minimum. */ + min_prev_count = 4; + FOR_EACH_EDGE (e, ei, bb->preds) + { + edge prev_e; + edge_iterator prev_ei; + + if (e->src == ENTRY_BLOCK_PTR) + { + min_prev_count = 0; + break; + } + FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds) + { + if (prev_e->src == ENTRY_BLOCK_PTR) + { + int count = ix86_count_insn_bb (e->src); + if (count < min_prev_count) + min_prev_count = count; + break; + } + } + } + + if (min_prev_count < 4) + min_prev_count += ix86_count_insn_bb (bb); + + return min_prev_count; +} + +/* Pad short funtion to 4 instructions. */ + +static void +ix86_pad_short_function (void) +{ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) + { + rtx ret = BB_END (e->src); + if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN) + { + int insn_count = ix86_count_insn (e->src); + + /* Pad short function. */ + if (insn_count < 4) + { + rtx insn = ret; + + /* Find epilogue. */ + while (insn + && (!NOTE_P (insn) + || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)) + insn = PREV_INSN (insn); + + if (!insn) + insn = ret; + + /* Two NOPs are counted as one instruction. */ + insn_count = 2 * (4 - insn_count); + emit_insn_before (gen_nops (GEN_INT (insn_count)), insn); + } + } + } +} + /* Implement machine specific optimizations. We implement padding of returns for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ static void @@ -27889,7 +28011,9 @@ ix86_reorg (void) { if (optimize && optimize_function_for_speed_p (cfun)) { - if (TARGET_PAD_RETURNS) + if (TARGET_PAD_SHORT_FUNCTION) + ix86_pad_short_function (); + else if (TARGET_PAD_RETURNS) ix86_pad_returns (); #ifdef ASM_OUTPUT_MAX_SKIP_PAD if (TARGET_FOUR_JUMP_LIMIT) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 22dd02b..aa246c6 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -299,6 +299,7 @@ enum ix86_tune_indices { X86_TUNE_USE_BT, X86_TUNE_USE_INCDEC, X86_TUNE_PAD_RETURNS, + X86_TUNE_PAD_SHORT_FUNCTION, X86_TUNE_EXT_80387_CONSTANTS, X86_TUNE_SHORTEN_X87_SSE, X86_TUNE_AVOID_VECTOR_DECODE, @@ -385,6 +386,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] #define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC] #define TARGET_PAD_RETURNS ix86_tune_features[X86_TUNE_PAD_RETURNS] +#define TARGET_PAD_SHORT_FUNCTION \ + ix86_tune_features[X86_TUNE_PAD_SHORT_FUNCTION] #define TARGET_EXT_80387_CONSTANTS \ ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS] #define TARGET_SHORTEN_X87_SSE ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE] diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ec43793..4ccd932 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -104,6 +104,7 @@ UNSPEC_LD_MPIC ; load_macho_picbase UNSPEC_TRUNC_NOOP UNSPEC_DIV_ALREADY_SPLIT + UNSPEC_NOPS ;; For SSE/MMX support: UNSPEC_FIX_NOTRUNC @@ -11465,6 +11466,39 @@ (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) +;; Generate nops. Operand 0 is the number of nops, up to 8. +(define_insn "nops" + [(unspec [(match_operand 0 "const_int_operand" "")] + UNSPEC_NOPS)] + "reload_completed" +{ + switch (INTVAL (operands[0])) + { + case 1: + return "nop"; + case 2: + return "nop; nop"; + case 3: + return "nop; nop; nop"; + case 4: + return "nop; nop; nop; nop"; + case 5: + return "nop; nop; nop; nop; nop"; + case 6: + return "nop; nop; nop; nop; nop; nop"; + case 7: + return "nop; nop; nop; nop; nop; nop; nop"; + case 8: + return "nop; nop; nop; nop; nop; nop; nop; nop"; + default: + gcc_unreachable (); + break; + } +} + [(set (attr "length") (symbol_ref "INTVAL (operands[0])")) + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + ;; Pad to 16-byte boundary, max skip in op0. Used to avoid ;; branch prediction penalty for the third jump in a 16-byte ;; block on K8. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6bec743..f1193f9 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,20 @@ 2010-09-17 H.J. Lu + * gcc.target/i386/pad-1.c: New. + * gcc.target/i386/pad-2.c: Likewise. + * gcc.target/i386/pad-3.c: Likewise. + * gcc.target/i386/pad-4.c: Likewise. + * gcc.target/i386/pad-5a.c: Likewise. + * gcc.target/i386/pad-5b.c: Likewise. + * gcc.target/i386/pad-6a.c: Likewise. + * gcc.target/i386/pad-6b.c: Likewise. + * gcc.target/i386/pad-7.c: Likewise. + * gcc.target/i386/pad-8.c: Likewise. + * gcc.target/i386/pad-9.c: Likewise. + * gcc.target/i386/pad-10.c: Likewise. + +2010-09-17 H.J. Lu + PR middle-end/45234 * gcc.dg/torture/stackalign/alloca-5.c: New. diff --git a/gcc/testsuite/gcc.target/i386/pad-1.c b/gcc/testsuite/gcc.target/i386/pad-1.c new file mode 100644 index 0000000..87a9d6c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pad-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fomit-frame-pointer -mtune=generic -S" } */ +/* { dg-final { scan-assembler "rep" } } */ +/* { dg-final { scan-assembler-not "nop" } } */ + +void +foo () +{ +} diff --git a/gcc/testsuite/gcc.target/i386/pad-10.c b/gcc/testsuite/gcc.target/i386/pad-10.c new file mode 100644 index 0000000..6ba3b78 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pad-10.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */ +/* { dg-final { scan-assembler-not "nop" } } */ +/* { dg-final { scan-assembler-not "rep" } } */ + +extern void bar (); + +int +foo2 (int z, int x) +{ + if (x == 1) + { + bar (); + return z; + } + else + return x + z; +} diff --git a/gcc/testsuite/gcc.target/i386/pad-2.c b/gcc/testsuite/gcc.target/i386/pad-2.c new file mode 100644 index 0000000..964547c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pad-2.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */ +/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop; nop; nop" 1 } } */ +/* { dg-final { scan-assembler-not "rep" } } */ + +void +foo () +{ +} diff --git a/gcc/testsuite/gcc.target/i386/pad-3.c b/gcc/testsuite/gcc.target/i386/pad-3.c new file mode 100644 index 0000000..52442b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pad-3.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */ +/* { dg-final { scan-assembler-not "nop" } } */ +/* { dg-final { scan-assembler-not "rep" } } */ + +int s[8] = {1, 2, 3, 4, 5, 6, 7, 8}; +int d[8] = {11, 22, 33, 44, 55, 66, 77, 88}; + +void +foo () +{ + int i; + for (i = 0; i < 8; i++) + d[i] = s[i] + 0x1000; +} diff --git a/gcc/testsuite/gcc.target/i386/pad-4.c b/gcc/testsuite/gcc.target/i386/pad-4.c new file mode 100644 index 0000000..a7033fa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pad-4.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S -fPIC" } */ +/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop; nop; nop" 1 } } */ +/* { dg-final { scan-assembler-not "rep" } } */ + +extern int bar; + +int +foo () +{ + return bar; +} diff --git a/gcc/testsuite/gcc.target/i386/pad-5a.c b/gcc/testsuite/gcc.target/i386/pad-5a.c new file mode 100644 index 0000000..9d0aa2a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pad-5a.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */ +/* { dg-final { scan-assembler-times "nop; nop" 1 } } */ +/* { dg-final { scan-assembler-not "nop; nop; nop" } } */ +/* { dg-final { scan-assembler-not "rep" } } */ + +int +foo (int x, int y, int z) +{ + return x + y + z; +} diff --git a/gcc/testsuite/gcc.target/i386/pad-5b.c b/gcc/testsuite/gcc.target/i386/pad-5b.c new file mode 100644 index 0000000..2e1cf12 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pad-5b.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */ +/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */ +/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */ +/* { dg-final { scan-assembler-not "rep" } } */ + +int +foo (int x, int y, int z) +{ + return x + y + z; +} diff --git a/gcc/testsuite/gcc.target/i386/pad-6a.c b/gcc/testsuite/gcc.target/i386/pad-6a.c new file mode 100644 index 0000000..e865967 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pad-6a.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */ +/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */ +/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */ +/* { dg-final { scan-assembler-not "rep" } } */ + +int +foo (int x, int y) +{ + return x + y; +} diff --git a/gcc/testsuite/gcc.target/i386/pad-6b.c b/gcc/testsuite/gcc.target/i386/pad-6b.c new file mode 100644 index 0000000..41aeaee --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pad-6b.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */ +/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop" 1 } } */ +/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop; nop; nop" } } */ +/* { dg-final { scan-assembler-not "rep" } } */ + +int +foo (int x, int y) +{ + return x + y; +} diff --git a/gcc/testsuite/gcc.target/i386/pad-7.c b/gcc/testsuite/gcc.target/i386/pad-7.c new file mode 100644 index 0000000..7a7493d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pad-7.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */ +/* { dg-final { scan-assembler-not "nop" } } */ +/* { dg-final { scan-assembler-not "rep" } } */ + +int +foo (int x, int y, int z) +{ + return x + y + z + y; +} diff --git a/gcc/testsuite/gcc.target/i386/pad-8.c b/gcc/testsuite/gcc.target/i386/pad-8.c new file mode 100644 index 0000000..873a0a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pad-8.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */ +/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop" 1 } } */ +/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop; nop; nop" } } */ +/* { dg-final { scan-assembler-not "rep" } } */ + +int +foo (int x, int y) +{ + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pad-9.c b/gcc/testsuite/gcc.target/i386/pad-9.c new file mode 100644 index 0000000..3d68805 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pad-9.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */ +/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */ +/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */ +/* { dg-final { scan-assembler-not "rep" } } */ + +extern void bar (void); + +void +foo (int x) +{ + if (x) + bar (); +} -- 2.7.4