From: Teresa Johnson Date: Fri, 6 Apr 2012 05:03:49 +0000 (+0000) Subject: Avoid instructions that incur expensive length-changing prefix (LCP) stalls on some... X-Git-Tag: upstream/12.2.0~77030 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7b38ee83df3686a974d84c17da96101239de47bb;p=platform%2Fupstream%2Fgcc.git Avoid instructions that incur expensive length-changing prefix (LCP) stalls on some x86-64 implementations... Avoid instructions that incur expensive length-changing prefix (LCP) stalls on some x86-64 implementations, notably Core2 and Corei7. Specifically, a move of a 16-bit constant into memory requires a length-changing prefix and can incur significant penalties. Modified an old patch written by H.J to split such instructions during peephole2. 2012-04-05 Teresa Johnson H.J. Lu * config/i386/i386.h (ix86_tune_indices): Add X86_TUNE_LCP_STALL. * config/i386/i386.md (move immediate to memory peephole2): Add cases for HImode move when LCP stall avoidance is needed. * config/i386/i386.c (initial_ix86_tune_features): Initialize X86_TUNE_LCP_STALL entry. Co-Authored-By: H.J. Lu From-SVN: r186176 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0df25cf..8118ed5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2012-04-05 Teresa Johnson + H.J. Lu + + * config/i386/i386.h (ix86_tune_indices): Add + X86_TUNE_LCP_STALL. + * config/i386/i386.md (move immediate to memory peephole2): + Add cases for HImode move when LCP stall avoidance is needed. + * config/i386/i386.c (initial_ix86_tune_features): Initialize + X86_TUNE_LCP_STALL entry. + 2012-04-05 Uros Bizjak PR target/52882 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c959113..8974ddc 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1964,6 +1964,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_PARTIAL_FLAG_REG_STALL */ m_CORE2I7 | m_GENERIC, + /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall + * on 16-bit immediate moves into memory on Core2 and Corei7. */ + m_CORE2I7 | m_GENERIC, + /* X86_TUNE_USE_HIMODE_FIOP */ m_386 | m_486 | m_K6_GEODE, diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 7ba90c7..8942ea8 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -262,6 +262,7 @@ enum ix86_tune_indices { X86_TUNE_MOVX, X86_TUNE_PARTIAL_REG_STALL, X86_TUNE_PARTIAL_FLAG_REG_STALL, + X86_TUNE_LCP_STALL, X86_TUNE_USE_HIMODE_FIOP, X86_TUNE_USE_SIMODE_FIOP, X86_TUNE_USE_MOV0, @@ -340,6 +341,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL] #define TARGET_PARTIAL_FLAG_REG_STALL \ ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL] +#define TARGET_LCP_STALL \ + ix86_tune_features[X86_TUNE_LCP_STALL] #define TARGET_USE_HIMODE_FIOP ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP] #define TARGET_USE_SIMODE_FIOP ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP] #define TARGET_USE_MOV0 ix86_tune_features[X86_TUNE_USE_MOV0] diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 2d20a52..dd1f9be 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -16971,15 +16971,17 @@ (set (match_dup 0) (match_dup 2))]) ;; Don't move an immediate directly to memory when the instruction -;; gets too big. +;; gets too big, or if LCP stalls are a problem for 16-bit moves. (define_peephole2 [(match_scratch:SWI124 1 "") (set (match_operand:SWI124 0 "memory_operand") (const_int 0))] "optimize_insn_for_speed_p () - && !TARGET_USE_MOV0 - && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cur_cost ()->large_insn + && ((mode == HImode + && TARGET_LCP_STALL) + || (!TARGET_USE_MOV0 + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn)) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 2) (const_int 0)) (clobber (reg:CC FLAGS_REG))]) @@ -16991,8 +16993,10 @@ (set (match_operand:SWI124 0 "memory_operand") (match_operand:SWI124 1 "immediate_operand"))] "optimize_insn_for_speed_p () - && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" + && ((mode == HImode + && TARGET_LCP_STALL) + || (TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))])