From: razya Date: Thu, 4 May 2006 09:19:02 +0000 (+0000) Subject: 2006-05-04 Leehod Baruch X-Git-Tag: upstream/4.9.2~54710 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2b74c150e7f74609e313b1be10302126ca9f5e33;p=platform%2Fupstream%2Flinaro-gcc.git 2006-05-04 Leehod Baruch * see.c: New file. * Makefile.in (OBJS-common): Add see.o. (see.o): Add dependencies. * common.opt (fsee): New flag for the see optimization was added. * opts.c (flag_see): Initialized. * passes.c (init_optimization_passes, pass_see): New pass. * rtl.h (see_main): Declaration as extern. * timevar.def (TV_SEE): New. * tree-pass.h (pass_see): Declaration as extern. * invoke.texi (-fsee): Document. * recog.c (validate_simplify_insn): New function. * recog.h (validate_simplify_insn): Declaration as extern. * df-problems.c (df_chain_dump): Check for NULL. 2006-05-04 Kenneth Zadeck Daniel Berlin * cfgrtl.c (insert_insn_bb_end_new): New function. * basic-block.h (insert_insn_bb_end_new): Declaration as extern. 2006-05-04 Leehod Baruch * df.h (struct web_entry): Moved from web.c. (union_defs): Declaration as extern. (unionfind_root): Likewise. (unionfind_union): Likewise. * web.c (struct web_entry): Moved to df.h. (unionfind_root): Remove static declaration. (unionfind_union): Likewise. (union_defs): Likewise and generalize to use callback function. (web_main): Update arguments for union_defs function call. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@113518 138bc75d-0d04-0410-961f-82ee72b054a4 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index fda813a..d3195ac 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,37 @@ +2006-05-04 Leehod Baruch + + * see.c: New file. + * Makefile.in (OBJS-common): Add see.o. + (see.o): Add dependencies. + * common.opt (fsee): New flag for the see optimization was added. + * opts.c (flag_see): Initialized. + * passes.c (init_optimization_passes, pass_see): New pass. + * rtl.h (see_main): Declaration as extern. + * timevar.def (TV_SEE): New. + * tree-pass.h (pass_see): Declaration as extern. + * invoke.texi (-fsee): Document. + * recog.c (validate_simplify_insn): New function. + * recog.h (validate_simplify_insn): Declaration as extern. + * df-problems.c (df_chain_dump): Check for NULL. + +2006-05-04 Kenneth Zadeck + Daniel Berlin + + * cfgrtl.c (insert_insn_bb_end_new): New function. + * basic-block.h (insert_insn_bb_end_new): Declaration as extern. + +2006-05-04 Leehod Baruch + + * df.h (struct web_entry): Moved from web.c. + (union_defs): Declaration as extern. + (unionfind_root): Likewise. + (unionfind_union): Likewise. + * web.c (struct web_entry): Moved to df.h. + (unionfind_root): Remove static declaration. + (unionfind_union): Likewise. + (union_defs): Likewise and generalize to use callback function. + (web_main): Update arguments for union_defs function call. + 2006-05-04 Richard Guenther PR tree-optimization/26447 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 4239808..807380a 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -995,7 +995,7 @@ OBJS-common = \ real.o recog.o reg-stack.o regclass.o regmove.o regrename.o \ reload.o reload1.o reorg.o resource.o rtl.o rtlanal.o rtl-error.o \ sbitmap.o sched-deps.o sched-ebb.o sched-rgn.o sched-vis.o sdbout.o \ - simplify-rtx.o sreal.o stmt.o stor-layout.o stringpool.o \ + see.o simplify-rtx.o sreal.o stmt.o stor-layout.o stringpool.o \ struct-equiv.o targhooks.o timevar.o toplev.o tracer.o tree.o tree-dump.o \ varasm.o varray.o vec.o version.o vmsdbgout.o xcoffout.o alloc-pool.o \ et-forest.o cfghooks.o bt-load.o pretty-print.o $(GGC) web.o passes.o \ @@ -2314,6 +2314,9 @@ cse.o : cse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \ web.o : web.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ hard-reg-set.h $(FLAGS_H) $(BASIC_BLOCK_H) $(FUNCTION_H) output.h toplev.h \ $(DF_H) $(OBSTACK_H) timevar.h tree-pass.h +see.o : see.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ + hard-reg-set.h $(FLAGS_H) $(BASIC_BLOCK_H) function.h output.h toplev.h \ + $(DF_H) $(OBSTACK_H) timevar.h tree-pass.h gcse.o : gcse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(REGS_H) hard-reg-set.h $(FLAGS_H) $(REAL_H) insn-config.h $(GGC_H) \ $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) output.h toplev.h \ diff --git a/gcc/basic-block.h b/gcc/basic-block.h index 87683fe..21ed892 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -1001,6 +1001,8 @@ extern basic_block get_bb_original (basic_block); extern void set_bb_copy (basic_block, basic_block); extern basic_block get_bb_copy (basic_block); +extern rtx insert_insn_end_bb_new (rtx, basic_block); + #include "cfghooks.h" /* In struct-equiv.c */ diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c index 026b37e..3afbee4 100644 --- a/gcc/cfgrtl.c +++ b/gcc/cfgrtl.c @@ -2899,6 +2899,93 @@ init_rtl_bb_info (basic_block bb) } +/* Add EXPR to the end of basic block BB. */ + +rtx +insert_insn_end_bb_new (rtx pat, basic_block bb) +{ + rtx insn = BB_END (bb); + rtx new_insn; + rtx pat_end = pat; + + while (NEXT_INSN (pat_end) != NULL_RTX) + pat_end = NEXT_INSN (pat_end); + + /* If the last insn is a jump, insert EXPR in front [taking care to + handle cc0, etc. properly]. Similarly we need to care trapping + instructions in presence of non-call exceptions. */ + + if (JUMP_P (insn) + || (NONJUMP_INSN_P (insn) + && (!single_succ_p (bb) + || single_succ_edge (bb)->flags & EDGE_ABNORMAL))) + { +#ifdef HAVE_cc0 + rtx note; +#endif + /* If this is a jump table, then we can't insert stuff here. Since + we know the previous real insn must be the tablejump, we insert + the new instruction just before the tablejump. */ + if (GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) + insn = prev_real_insn (insn); + +#ifdef HAVE_cc0 + /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts + if cc0 isn't set. */ + note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX); + if (note) + insn = XEXP (note, 0); + else + { + rtx maybe_cc0_setter = prev_nonnote_insn (insn); + if (maybe_cc0_setter + && INSN_P (maybe_cc0_setter) + && sets_cc0_p (PATTERN (maybe_cc0_setter))) + insn = maybe_cc0_setter; + } +#endif + /* FIXME: What if something in cc0/jump uses value set in new + insn? */ + new_insn = emit_insn_before_noloc (pat, insn); + } + + /* Likewise if the last insn is a call, as will happen in the presence + of exception handling. */ + else if (CALL_P (insn) + && (!single_succ_p (bb) + || single_succ_edge (bb)->flags & EDGE_ABNORMAL)) + { + /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers, + we search backward and place the instructions before the first + parameter is loaded. Do this for everyone for consistency and a + presumption that we'll get better code elsewhere as well. */ + + /* Since different machines initialize their parameter registers + in different orders, assume nothing. Collect the set of all + parameter registers. */ + insn = find_first_parameter_load (insn, BB_HEAD (bb)); + + /* If we found all the parameter loads, then we want to insert + before the first parameter load. + + If we did not find all the parameter loads, then we might have + stopped on the head of the block, which could be a CODE_LABEL. + If we inserted before the CODE_LABEL, then we would be putting + the insn in the wrong basic block. In that case, put the insn + after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */ + while (LABEL_P (insn) + || NOTE_INSN_BASIC_BLOCK_P (insn)) + insn = NEXT_INSN (insn); + + new_insn = emit_insn_before_noloc (pat, insn); + } + else + new_insn = emit_insn_after_noloc (pat, insn); + + return new_insn; +} + /* Implementation of CFG manipulation for linearized RTL. */ struct cfg_hooks rtl_cfg_hooks = { "rtl", @@ -2973,4 +3060,3 @@ struct cfg_hooks cfg_layout_rtl_cfg_hooks = { rtl_extract_cond_bb_edges, /* extract_cond_bb_edges */ NULL /* flush_pending_stmts */ }; - diff --git a/gcc/common.opt b/gcc/common.opt index 53836f3..9d9cbca 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -809,6 +809,10 @@ frtl-abstract-sequences Common Report Var(flag_rtl_seqabstr) Perform sequence abstraction optimization on RTL +fsee +Common Report Var(flag_see) Init(0) +Eliminate redundant sign extensions using LCM. + fshow-column Common C ObjC C++ ObjC++ Report Var(flag_show_column) Init(1) Show column numbers in diagnostics, when available. Default on diff --git a/gcc/df-problems.c b/gcc/df-problems.c index 8dd3446..4f5f2e1 100644 --- a/gcc/df-problems.c +++ b/gcc/df-problems.c @@ -2992,7 +2992,9 @@ df_chains_dump (struct dataflow *dflow, FILE *file) { fprintf (file, "d%d bb %d luid %d insn %d reg %d ", j, DF_REF_BBNO (def), - DF_INSN_LUID (df, DF_REF_INSN (def)), + DF_REF_INSN (def) ? + DF_INSN_LUID (df, DF_REF_INSN (def)): + -1, DF_REF_INSN (def) ? DF_REF_INSN_UID (def) : -1, DF_REF_REGNO (def)); if (def->flags & DF_REF_READ_WRITE) diff --git a/gcc/df.h b/gcc/df.h index 720bbfa..3a735ae 100644 --- a/gcc/df.h +++ b/gcc/df.h @@ -593,4 +593,23 @@ extern void df_hard_reg_init (void); extern bool df_read_modify_subreg_p (rtx); +/* web */ + +/* This entry is allocated for each reference in the insn stream. */ +struct web_entry +{ + /* Pointer to the parent in the union/find tree. */ + struct web_entry *pred; + /* Newly assigned register to the entry. Set only for roots. */ + rtx reg; + void* extra_info; +}; + +extern struct web_entry *unionfind_root (struct web_entry *); +extern bool unionfind_union (struct web_entry *, struct web_entry *); +extern void union_defs (struct df *, struct df_ref *, + struct web_entry *, struct web_entry *, + bool (*fun) (struct web_entry *, struct web_entry *)); + + #endif /* GCC_DF_H */ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 3e523d4..71c6925 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -334,7 +334,7 @@ Objective-C and Objective-C++ Dialects}. -fsched-spec-load-dangerous @gol -fsched-stalled-insns=@var{n} -fsched-stalled-insns-dep=@var{n} @gol -fsched2-use-superblocks @gol --fsched2-use-traces -freschedule-modulo-scheduled-loops @gol +-fsched2-use-traces -fsee -freschedule-modulo-scheduled-loops @gol -fsection-anchors -fsignaling-nans -fsingle-precision-constant @gol -fstack-protector -fstack-protector-all @gol -fstrict-aliasing -ftracer -fthread-jumps @gol @@ -5008,6 +5008,12 @@ match the reality and hurt the performance. This only makes sense when scheduling after register allocation, i.e.@: with @option{-fschedule-insns2} or at @option{-O2} or higher. +@item -fsee +@opindex fsee +Eliminates redundant extension instructions and move the non redundant +ones to optimal placement using LCM. +Enabled at level @option{-O3}. + @item -freschedule-modulo-scheduled-loops @opindex fscheduling-in-modulo-scheduled-loops The modulo scheduling comes before the traditional scheduling, if a loop was modulo scheduled diff --git a/gcc/opts.c b/gcc/opts.c index 00b9716..6220018 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -592,6 +592,7 @@ decode_options (unsigned int argc, const char **argv) flag_inline_functions = 1; flag_unswitch_loops = 1; flag_gcse_after_reload = 1; + flag_see = 1; } if (optimize < 2 || optimize_size) diff --git a/gcc/passes.c b/gcc/passes.c index 0ca4cf5..fe6f637 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -652,6 +652,7 @@ init_optimization_passes (void) NEXT_PASS (pass_regmove); NEXT_PASS (pass_split_all_insns); NEXT_PASS (pass_mode_switching); + NEXT_PASS (pass_see); NEXT_PASS (pass_recompute_reg_usage); NEXT_PASS (pass_sms); NEXT_PASS (pass_sched); diff --git a/gcc/recog.c b/gcc/recog.c index 342699f..cd1cb7d 100644 --- a/gcc/recog.c +++ b/gcc/recog.c @@ -698,6 +698,46 @@ validate_replace_src_group (rtx from, rtx to, rtx insn) d.insn = insn; note_uses (&PATTERN (insn), validate_replace_src_1, &d); } + +/* Try simplify INSN. + Invoke simplify_rtx () on every SET_SRC and SET_DEST inside the INSN's + pattern and return true if something was simplified. */ + +bool +validate_simplify_insn (rtx insn) +{ + int i; + rtx pat = NULL; + rtx newpat = NULL; + + pat = PATTERN (insn); + + if (GET_CODE (pat) == SET) + { + newpat = simplify_rtx (SET_SRC (pat)); + if (newpat && !rtx_equal_p (SET_SRC (pat), newpat)) + validate_change (insn, &SET_SRC (pat), newpat, 1); + newpat = simplify_rtx (SET_DEST (pat)); + if (newpat && !rtx_equal_p (SET_DEST (pat), newpat)) + validate_change (insn, &SET_DEST (pat), newpat, 1); + } + else if (GET_CODE (pat) == PARALLEL) + for (i = 0; i < XVECLEN (pat, 0); i++) + { + rtx s = XVECEXP (pat, 0, i); + + if (GET_CODE (XVECEXP (pat, 0, i)) == SET) + { + newpat = simplify_rtx (SET_SRC (s)); + if (newpat && !rtx_equal_p (SET_SRC (s), newpat)) + validate_change (insn, &SET_SRC (s), newpat, 1); + newpat = simplify_rtx (SET_DEST (s)); + if (newpat && !rtx_equal_p (SET_DEST (s), newpat)) + validate_change (insn, &SET_DEST (s), newpat, 1); + } + } + return ((num_changes_pending () > 0) && (apply_change_group () > 0)); +} #ifdef HAVE_cc0 /* Return 1 if the insn using CC0 set by INSN does not contain diff --git a/gcc/recog.h b/gcc/recog.h index 4e1ac2b..8281a9e 100644 --- a/gcc/recog.h +++ b/gcc/recog.h @@ -87,6 +87,7 @@ extern int strict_memory_address_p (enum machine_mode, rtx); extern int validate_replace_rtx (rtx, rtx, rtx); extern void validate_replace_rtx_group (rtx, rtx, rtx); extern void validate_replace_src_group (rtx, rtx, rtx); +extern bool validate_simplify_insn (rtx insn); extern int num_changes_pending (void); #ifdef HAVE_cc0 extern int next_insn_tests_no_inequality (rtx); diff --git a/gcc/see.c b/gcc/see.c new file mode 100644 index 0000000..603d72e --- /dev/null +++ b/gcc/see.c @@ -0,0 +1,3782 @@ +/* Sign extension elimination optimization for GNU compiler. + Copyright (C) 2005 Free Software Foundation, Inc. + Contributed by Leehod Baruch + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +-Software Foundation; either version 2, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. + +Problem description: +-------------------- +In order to support 32bit computations on a 64bit machine, sign +extension instructions are generated to ensure the correctness of +the computation. +A possible policy (as currently implemented) is to generate a sign +extension right after each 32bit computation. +Depending on the instruction set of the architecture, some of these +sign extension instructions may be redundant. +There are two cases in which the extension may be redundant: + +Case1: +The instruction that uses the 64bit operands that are sign +extended has a dual mode that works with 32bit operands. +For example: + + int32 a, b; + + a = .... --> a = .... + a = sign extend a --> + b = .... --> b = .... + b = sign extend a --> + --> + cmpd a, b --> cmpw a, b //half word compare + +Case2: +The instruction that defines the 64bit operand (which is later sign +extended) has a dual mode that defines and sign-extends simultaneously +a 32bit operand. For example: + + int32 a; + + ld a --> lwa a // load half word and sign extend + a = sign extend a --> + --> + return a --> return a + + +General idea for solution: +-------------------------- +First, try to merge the sign extension with the instruction that +defines the source of the extension and (separately) with the +instructions that uses the extended result. By doing this, both cases +of redundancies (as described above) will be eliminated. + +Then, use partial redundancy elimination to place the non redundant +ones at optimal placements. + + +Implementation by example: +-------------------------- +Note: The instruction stream is not changed till the last phase. + +Phase 0: Initial code, as currently generated by gcc. + + def1 def3 + se1 def2 se3 + | \ | / | + | \ | / | + | \ | / | + | \ | / | + | \ | / | + | \|/ | + use1 use2 use3 + use4 +def1 + se1: +set ((reg:SI 10) (..def1rhs..)) +set ((reg:DI 100) (sign_extend:DI (reg:SI 10))) + +def2: +set ((reg:DI 100) (const_int 7)) + +def3 + se3: +set ((reg:SI 20) (..def3rhs..)) +set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) + +use1: +set ((reg:CC...) (compare:CC (reg:DI 100) (...))) + +use2, use3, use4: +set ((...) (reg:DI 100)) + +Phase 1: Propagate extensions to uses. + + def1 def3 + se1 def2 se3 + | \ | / | + | \ | / | + | \ | / | + | \ | / | + | \ | / | + | \|/ | + se se se + use1 use2 use3 + se + use4 + +From here, all of the subregs are lowpart ! + +def1, def2, def3: No change. + +use1: +set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) +set ((reg:CC...) (compare:CC (reg:DI 100) (...))) + +use2, use3, use4: +set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) +set ((...) (reg:DI 100)) + + +Phase 2: Merge and eliminate locally redundant extensions. + + + *def1 def2 *def3 + [se removed] se se3 + | \ | / | + | \ | / | + | \ | / | + | \ | / | + | \ | / | + | \|/ | + [se removed] se se + *use1 use2 use3 + [se removed] + use4 + +The instructions that were changed at this phase are marked with +asterisk. + +*def1: Merge failed. + Remove the sign extension instruction, modify def1 and + insert a move instruction to assure to correctness of the code. +set ((subreg:SI (reg:DI 100)) (..def1rhs..)) +set ((reg:SI 10) (subreg:SI (reg:DI 100))) + +def2 + se: There is no need for merge. + Def2 is not changed but a sign extension instruction is + created. +set ((reg:DI 100) (const_int 7)) +set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) + +*def3 + se3: Merge succeeded. +set ((reg:DI 100) (sign_extend:DI (..def3rhs..))) +set ((reg:SI 20) (reg:DI 100)) +set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) +(The extension instruction is the original one). + +*use1: Merge succeeded. Remove the sign extension instruction. +set ((reg:CC...) + (compare:CC (subreg:SI (reg:DI 100)) (...))) + +use2, use3: Merge failed. No change. + +use4: The extension is locally redundant, therefore it is eliminated + at this point. + + +Phase 3: Eliminate globally redundant extensions. + +Following the LCM output: + + def1 def2 def3 + se se3 + | \ | / | + | \ | / | + | se | / | + | \ | / | + | \ | / | + | \|/ | + [ses removed] + use1 use2 use3 + use4 + +se: +set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) + +se3: +set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) + + +Phase 4: Commit changes to the insn stream. + + + def1 def3 *def1 def2 *def3 + se1 def2 se3 [se removed] [se removed] + | \ | / | | \ | / | + | \ | / | ------> | \ | / | + | \ | / | ------> | se | / | + | \ | / | | \ | / | + | \ | / | | \ | / | + | \|/ | | \|/ | + use1 use2 use3 *use1 use2 use3 + use4 use4 + +The instructions that were changed during the whole optimization are +marked with asterisk. + +The result: + +def1 + se1: +[ set ((reg:SI 10) (..def1rhs..)) ] - Deleted +[ set ((reg:DI 100) (sign_extend:DI (reg:SI 10))) ] - Deleted +set ((subreg:SI (reg:DI 100)) (..def3rhs..)) - Inserted +set ((reg:SI 10) (subreg:SI (reg:DI 100))) - Inserted + +def2: +set ((reg:DI 100) (const_int 7)) - No change + +def3 + se3: +[ set ((reg:SI 20) (..def3rhs..)) ] - Deleted +[ set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) ] - Deleted +set ((reg:DI 100) (sign_extend:DI (..def3rhs..))) - Inserted +set ((reg:SI 20) (reg:DI 100)) - Inserted + +use1: +[ set ((reg:CC...) (compare:CC (reg:DI 100) (...))) ] - Deleted +set ((reg:CC...) - Inserted + (compare:CC (subreg:SI (reg:DI 100)) (...))) + +use2, use3, use4: +set ((...) (reg:DI 100)) - No change + +se: - Inserted +set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) + +Note: Most of the simple move instructions that were inserted will be + trivially dead and therefore eliminated. + +The implementation outline: +--------------------------- +Some definitions: + A web is RELEVANT if at the end of phase 1, his leader's + relevancy is {ZERO, SIGN}_EXTENDED_DEF. The source_mode of + the web is the source_mode of his leader. + A definition is a candidate for the optimization if it is part + of a RELEVANT web and his local source_mode is not narrower + then the source_mode of its web. + A use is a candidate for the optimization if it is part of a + RELEVANT web. + A simple explicit extension is a single set instruction that + extends a register (or a subregister) to a register (or + subregister). + A complex explicit extension is an explicit extension instruction + that is not simple. + A def extension is a simple explicit extension that is + also a candidate for the optimization. This extension is part + of the instruction stream, it is not generated by this + optimization. + A use extension is a simple explicit extension that is generated + and stored for candidate use during this optimization. It is + not emitted to the instruction stream till the last phase of + the optimization. + A reference is an instruction that satisfy at least on of these + criteria: + - It contains a definition with EXTENDED_DEF relevancy in a RELEVANT web. + - It is followed by a def extension. + - It contains a candidate use. + +Phase 1: Propagate extensions to uses. + In this phase, we find candidate extensions for the optimization + and we generate (but not emit) proper extensions "right before the + uses". + + a. Build a DF object. + b. Traverse over all the instructions that contains a definition + and set their local relevancy and local source_mode like this: + - If the instruction is a simple explicit extension instruction, + mark it as {ZERO, SIGN}_EXTENDED_DEF according to the extension + type and mark its source_mode to be the mode of the quantity + that is been extended. + - Otherwise, If the instruction has an implicit extension, + which means that its high part is an extension of its low part, + or if it is a complicated explicit extension, mark it as + EXTENDED_DEF and set its source_mode to be the narrowest + mode that is been extended in the instruction. + c. Traverse over all the instructions that contains a use and set + their local relevancy to RELEVANT_USE (except for few corner + cases). + d. Produce the web. During union of two entries, update the + relevancy and source_mode of the leader. There are two major + guide lines for this update: + - If one of the entries is NOT_RELEVANT, mark the leader + NOT_RELEVANT. + - If one is ZERO_EXTENDED_DEF and the other is SIGN_EXTENDED_DEF + (or vice versa) mark the leader as NOT_RELEVANT. We don't + handle this kind of mixed webs. + (For more details about this update process, + see see_update_leader_extra_info ()). + e. Generate uses extensions according to the relevancy and + source_mode of the webs. + +Phase 2: Merge and eliminate locally redundant extensions. + In this phase, we try to merge def extensions and use + extensions with their references, and eliminate redundant extensions + in the same basic block. + + Traverse over all the references. Do this in basic block number and + luid number forward order. + For each reference do: + a. Peephole optimization - try to merge it with all its + def extensions and use extensions in the following + order: + - Try to merge only the def extensions, one by one. + - Try to merge only the use extensions, one by one. + - Try to merge any couple of use extensions simultaneously. + - Try to merge any def extension with one or two uses + extensions simultaneously. + b. Handle each EXTENDED_DEF in it as if it was already merged with + an extension. + + During the merge process we save the following data for each + register in each basic block: + a. The first instruction that defines the register in the basic + block. + b. The last instruction that defines the register in the basic + block. + c. The first extension of this register before the first + instruction that defines it in the basic block. + c. The first extension of this register after the last + instruction that defines it in the basic block. + This data will help us eliminate (or more precisely, not generate) + locally redundant extensions, and will be useful in the next stage. + + While merging extensions with their reference there are 4 possible + situations: + a. A use extension was merged with the reference: + Delete the extension instruction and save the merged reference + for phase 4. (For details, see see_use_extension_merged ()) + b. A use extension failed to be merged with the reference: + If there is already such an extension in the same basic block + and it is not dead at this point, delete the unmerged extension + (it is locally redundant), otherwise properly update the above + basic block data. + (For details, see see_merge_one_use_extension ()) + c. A def extension was merged with the reference: + Mark this extension as a merged_def extension and properly + update the above basic block data. + (For details, see see_merge_one_def_extension ()) + d. A def extension failed to be merged with the reference: + Replace the definition of the NARROWmode register in the + reference with the proper subreg of WIDEmode register and save + the result as a merged reference. Also, properly update the + the above basic block data. + (For details, see see_def_extension_not_merged ()) + +Phase 3: Eliminate globally redundant extensions. +In this phase, we set the bit vectors input of the edge based LCM +using the recorded data on the registers in each basic block. +We also save pointers for all the anticipatable and available +occurrences of the relevant extensions. Then we run the LCM. + + a. Initialize the comp, antloc, kill bit vectors to zero and the + transp bit vector to ones. + + b. Traverse over all the references. Do this in basic block number + and luid number forward order. For each reference: + - Go over all its use extensions. For each such extension - + If it is not dead from the beginning of the basic block SET + the antloc bit of the current extension in the current + basic block bits. + If it is not dead till the end of the basic block SET the + comp bit of the current extension in the current basic + block bits. + - Go over all its def extensions that were merged with + it. For each such extension - + If it is not dead till the end of the basic block SET the + comp bit of the current extension in the current basic + block bits. + RESET the proper transp and kill bits. + - Go over all its def extensions that were not merged + with it. For each such extension - + RESET the transp bit and SET the kill bit of the current + extension in the current basic block bits. + + c. Run the edge based LCM. + +Phase 4: Commit changes to the insn stream. +This is the only phase that actually changes the instruction stream. +Up to this point the optimization could be aborted at any time. +Here we insert extensions at their best placements and delete the +redundant ones according to the output of the LCM. We also replace +some of the instructions according to the second phase merges results. + + a. Use the pre_delete_map (from the output of the LCM) in order to + delete redundant extensions. This will prevent them from been + emitted in the first place. + + b. Insert extensions on edges where needed according to + pre_insert_map and edge_list (from the output of the LCM). + + c. For each reference do- + - Emit all the uses extensions that were not deleted until now, + right before the reference. + - Delete all the merged and unmerged def extensions from + the instruction stream. + - Replace the reference with the merged one, if exist. + +The implementation consists of four data structures: +- Data structure I + Purpose: To handle the relevancy of the uses, definitions and webs. + Relevant structures: web_entry (from df.h), see_entry_extra_info. + Details: This is a disjoint-set data structure. Most of its functions are + implemented in web.c. Each definition and use in the code are + elements. A web_entry structure is allocated for each element to + hold the element's relevancy and source_mode. The union rules are + defined in see_update_leader_extra_info (). +- Data structure II + Purpose: To store references and their extensions (uses and defs) + and to enable traverse over these references according to basic + block order. + Relevant structure: see_ref_s. + Details: This data structure consists of an array of splay trees. One splay + tree for each basic block. The splay tree nodes are references and + the keys are the luids of the references. + A see_ref_s structure is allocated for each reference. It holds the + reference itself, its def and uses extensions and later the merged + version of the reference. + Using this data structure we can traverse over all the references of + a basic block and their extensions in forward order. +- Data structure III. + Purpose: To store local properties of registers for each basic block. + This data will later help us build the LCM sbitmap_vectors + input. + Relevant structure: see_register_properties. + Details: This data structure consists of an array of hash tables. One hash + for each basic block. The hash node are a register properties + and the keys are the numbers of the registers. + A see_register_properties structure is allocated for each register + that we might be interested in its properties. + Using this data structure we can easily find the properties of a + register in a specific basic block. This is necessary for locally + redundancy elimination and for setting up the LCM input. +- Data structure IV. + Purpose: To store the extensions that are candidate for PRE and their + anticipatable and available occurrences. + Relevant structure: see_occr, see_pre_extension_expr. + Details: This data structure is a hash tables. Its nodes are the extensions + that are candidate for PRE. + A see_pre_extension_expr structure is allocated for each candidate + extension. It holds a copy of the extension and a linked list of all + the anticipatable and available occurrences of it. + We use this data structure when we read the output of the LCM. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" + +#include "obstack.h" +#include "rtl.h" +#include "output.h" +#include "df.h" +#include "insn-config.h" +#include "recog.h" +#include "expr.h" +#include "splay-tree.h" +#include "hashtab.h" +#include "regs.h" +#include "timevar.h" +#include "tree-pass.h" + +void +see_main (void); + +/* Used to classify defs and uses according to relevancy. */ +enum entry_type { + NOT_RELEVANT, + SIGN_EXTENDED_DEF, + ZERO_EXTENDED_DEF, + EXTENDED_DEF, + RELEVANT_USE +}; + +/* Used to classify extensions in relevant webs. */ +enum extension_type { + DEF_EXTENSION, + EXPLICIT_DEF_EXTENSION, + IMPLICIT_DEF_EXTENSION, + USE_EXTENSION +}; + +/* Global data structures and flags. */ + +/* This structure will be assigned for each web_entry structure (defined + in df.h). It is placed in the extra_info field of a web_entry and holds the + relevancy and source mode of the web_entry. */ + +struct see_entry_extra_info +{ + /* The relevancy of the ref. */ + enum entry_type relevancy; + /* The relevancy of the ref. + This field is updated only once - when this structure is created. */ + enum entry_type local_relevancy; + /* The source register mode. */ + enum machine_mode source_mode; + /* This field is used only if the relevancy is ZERO/SIGN_EXTENDED_DEF. + It is updated only once when this structure is created. */ + enum machine_mode local_source_mode; + /* This field is used only if the relevancy is EXTENDED_DEF. + It holds the narrowest mode that is sign extended. */ + enum machine_mode source_mode_signed; + /* This field is used only if the relevancy is EXTENDED_DEF. + It holds the narrowest mode that is zero extended. */ + enum machine_mode source_mode_unsigned; +}; + +/* There is one such structure for every reference. It stores the reference + itself as well as its extensions (uses and definitions). + Used as the value in splay_tree see_bb_splay_ar[]. */ +struct see_ref_s +{ + /* The luid of the insn. */ + unsigned int luid; + /* The insn of the ref. */ + rtx insn; + /* The merged insn that was formed from the reference's insn and extensions. + If all merges faile it remains NULL. */ + rtx merged_insn; + /* The def extensions of the reference that were not merged with + it. */ + htab_t unmerged_def_se_hash; + /* The def extensions of the reference that were merged with + it. Implicit extensions of the reference will be stored here too. */ + htab_t merged_def_se_hash; + /* The uses extensions of reference. */ + htab_t use_se_hash; +}; + +/* There is one such structure for every relevant extended register in a + specific basic block. This data will help us build the LCM sbitmap_vectors + input. */ +struct see_register_properties +{ + /* The register number. */ + unsigned int regno; + /* The last luid of the reference that defines this register in this basic + block. */ + int last_def; + /* The luid of the reference that has the first extension of this register + that appears before any definition in this basic block. */ + int first_se_before_any_def; + /* The luid of the reference that has the first extension of this register + that appears after the last definition in this basic block. */ + int first_se_after_last_def; +}; + +/* Occurrence of an expression. + There must be at most one available occurance and at most one anticipatable + occurrence per basic block. */ +struct see_occr +{ + /* Next occurrence of this expression. */ + struct see_occr *next; + /* The insn that computes the expression. */ + rtx insn; + int block_num; +}; + +/* There is one such structure for every relevant extension expression. + It holds a copy of this extension instruction as well as a linked lists of + pointers to all the antic and avail occurrences of it. */ +struct see_pre_extension_expr +{ + /* A copy of the extension instruction. */ + rtx se_insn; + /* Index in the available expression bitmaps. */ + int bitmap_index; + /* List of anticipatable occurrences in basic blocks in the function. + An "anticipatable occurrence" is the first occurrence in the basic block, + the operands are not modified in the basic block prior to the occurrence + and the output is not used between the start of the block and the + occurrence. */ + struct see_occr *antic_occr; + /* List of available occurrence in basic blocks in the function. + An "available occurrence" is the last occurrence in the basic block and + the operands are not modified by following statements in the basic block + [including this insn]. */ + struct see_occr *avail_occr; +}; + +/* Helper structure for the note_uses and see_replace_src functions. */ +struct see_replace_data +{ + rtx from; + rtx to; +}; + +/* Helper structure for the note_uses and see_mentioned_reg functions. */ +struct see_mentioned_reg_data +{ + rtx reg; + bool mentioned; +}; + +/* A data flow object that will be created once and used throughout the + optimization. */ +static struct df *df = NULL; +/* An array of web_entries. The i'th definition in the df object is associated + with def_entry[i] */ +static struct web_entry *def_entry = NULL; +/* An array of web_entries. The i'th use in the df object is associated with + use_entry[i] */ +static struct web_entry *use_entry = NULL; +/* Array of splay_trees. + see_bb_splay_ar[i] refers to the splay tree of the i'th basic block. + The splay tree will hold see_ref_s structures. The key is the luid + of the insn. This way we can traverse over the references of each basic + block in forward or backward order. */ +static splay_tree *see_bb_splay_ar = NULL; +/* Array of hashes. + see_bb_hash_ar[i] refers to the hash of the i'th basic block. + The hash will hold see_register_properties structure. The key is regno. */ +static htab_t *see_bb_hash_ar = NULL; +/* Hash table that holds a copy of all the extensions. The key is the right + hand side of the se_insn field. */ +static htab_t see_pre_extension_hash = NULL; + +/* Local LCM properties of expressions. */ +/* Nonzero for expressions that are transparent in the block. */ +static sbitmap *transp = NULL; +/* Nonzero for expressions that are computed (available) in the block. */ +static sbitmap *comp = NULL; +/* Nonzero for expressions that are locally anticipatable in the block. */ +static sbitmap *antloc = NULL; +/* Nonzero for expressions that are locally killed in the block. */ +static sbitmap *ae_kill = NULL; +/* Nonzero for expressions which should be inserted on a specific edge. */ +static sbitmap *pre_insert_map = NULL; +/* Nonzero for expressions which should be deleted in a specific block. */ +static sbitmap *pre_delete_map = NULL; +/* Contains the edge_list returned by pre_edge_lcm. */ +static struct edge_list *edge_list = NULL; +/* Records the last basic block at the beginning of the optimization. */ +static int last_bb; +/* Records the number of uses at the beginning of the optimization. */ +static unsigned int uses_num; +/* Records the number of definitions at the beginning of the optimization. */ +static unsigned int defs_num; + +#define ENTRY_EI(ENTRY) ((struct see_entry_extra_info *)(ENTRY)->extra_info) + +/* Functions implementation. */ + +/* Verifies that EXTENSION's pattern is this: + + set (reg/subreg reg1) (sign/zero_extend:WIDEmode (reg/subreg reg2)) + + If it doesn't have the expected pattern return NULL. + Otherwise, if RETURN_DEST_REG is set, return reg1 else return reg2. */ + +static rtx +see_get_extension_reg (rtx extension, bool return_dest_reg) +{ + rtx set = NULL; + rtx rhs = NULL; + rtx lhs = NULL; + rtx reg1 = NULL; + rtx reg2 = NULL; + + set = single_set (extension); + if (!set) + return NULL; + lhs = SET_DEST (set); + rhs = SET_SRC (set); + + if (REG_P (lhs)) + reg1 = lhs; + else if (REG_P (SUBREG_REG (lhs))) + reg1 = SUBREG_REG (lhs); + else + return NULL; + + if ((GET_CODE (rhs) != SIGN_EXTEND) && (GET_CODE (rhs) != ZERO_EXTEND)) + return NULL; + + rhs = XEXP (rhs, 0); + if (REG_P (rhs)) + reg2 = rhs; + else if (REG_P (SUBREG_REG (rhs))) + reg2 = SUBREG_REG (rhs); + else + return NULL; + + if (return_dest_reg) + return reg1; + return reg2; +} + +/* Verifies that EXTENSION's pattern is this: + + set (reg/subreg reg1) (sign/zero_extend: (...expr...) + + If it doesn't have the expected pattern return UNKNOWN. + Otherwise, set SOURCE_MODE to be the mode of the extended expr and return + the rtx code of the extension. */ + +static enum rtx_code +see_get_extension_data (rtx extension, enum machine_mode *source_mode) +{ + rtx rhs = NULL; + rtx lhs = NULL; + rtx set = NULL; + + if (!extension || !INSN_P (extension)) + return UNKNOWN; + + set = single_set (extension); + if (!set) + return NOT_RELEVANT; + rhs = SET_SRC (set); + lhs = SET_DEST (set); + + /* Don't handle extensions to something other then register or + subregister. */ + if (!REG_P (lhs) && !SUBREG_REG (lhs)) + return UNKNOWN; + + if ((GET_CODE (rhs) != SIGN_EXTEND) && (GET_CODE (rhs) != ZERO_EXTEND)) + return UNKNOWN; + + if (!REG_P (XEXP (rhs, 0)) + && !((GET_CODE (XEXP (rhs, 0)) == SUBREG) + && (REG_P (SUBREG_REG (XEXP (rhs, 0)))))) + return UNKNOWN; + + *source_mode = GET_MODE (XEXP (rhs, 0)); + + if (GET_CODE (rhs) == SIGN_EXTEND) + return SIGN_EXTEND; + else + return ZERO_EXTEND; +} + + +/* Generate instruction with the pattern: + set ((reg r) (sign/zero_extend (subreg:mode (reg r)))) + (the register r on both sides of the set is the same register). + And recognize it. + If the recognition failed, this is very bad, return NULL (This will abort + the entier optimization). + Otherwise, return the generated instruction. */ + +static rtx +see_gen_normalized_extension (rtx reg, enum rtx_code extension_code, + enum machine_mode mode) +{ + rtx subreg = NULL; + rtx extension = NULL; + rtx insn = NULL; + + if (!reg + || !REG_P (reg) + || ((extension_code != SIGN_EXTEND) && (extension_code != ZERO_EXTEND))) + return NULL; + + subreg = gen_lowpart_SUBREG (mode, reg); + if (extension_code == SIGN_EXTEND) + extension = gen_rtx_SIGN_EXTEND (GET_MODE (reg), subreg); + else + extension = gen_rtx_ZERO_EXTEND (GET_MODE (reg), subreg); + + start_sequence (); + emit_insn (gen_rtx_SET (VOIDmode, reg, extension)); + insn = get_insns (); + end_sequence (); + + if (insn_invalid_p (insn)) + /* Recognition failed, this is very bad for this optimization. + Abort the optimization. */ + return NULL; + return insn; +} + +/* Hashes and splay_trees related functions implementation. */ + +/* Helper functions for the pre_extension hash. + This kind of hash will hold see_pre_extension_expr structures. + + The key is the right hand side of the se_insn field. + Note that the se_insn is an expression that looks like: + + set ((reg:WIDEmode r1) (sign_extend:WIDEmode + (subreg:NARROWmode (reg:WIDEmode r2)))) */ + +/* Return TRUE if P1 has the same value in its rhs as P2. + Otherwise, return FALSE. + P1 and P2 are see_pre_extension_expr structures. */ + +static int +eq_descriptor_pre_extension (const void *p1, const void *p2) +{ + const struct see_pre_extension_expr *extension1 = p1; + const struct see_pre_extension_expr *extension2 = p2; + rtx set1 = single_set (extension1->se_insn); + rtx set2 = single_set (extension2->se_insn); + rtx rhs1 = NULL; + rtx rhs2 = NULL; + + gcc_assert (set1 && set2); + rhs1 = SET_SRC (set1); + rhs2 = SET_SRC (set2); + + return rtx_equal_p (rhs1, rhs2); +} + + +/* P is a see_pre_extension_expr struct, use the RHS of the se_insn field. + Note that the RHS is an expression that looks like this: + (sign_extend:WIDEmode (subreg:NARROWmode (reg:WIDEmode r))) */ + +static hashval_t +hash_descriptor_pre_extension (const void *p) +{ + const struct see_pre_extension_expr *extension = p; + rtx set = single_set (extension->se_insn); + rtx rhs = NULL; + + gcc_assert (set); + rhs = SET_SRC (set); + + return hash_rtx (rhs, GET_MODE (rhs), 0, NULL, 0); +} + + +/* Free the allocated memory of the current see_pre_extension_expr struct. + + It frees the two linked list of the occurrences structures. */ + +static void +hash_del_pre_extension (void *p) +{ + struct see_pre_extension_expr *extension = p; + struct see_occr *curr_occr = extension->antic_occr; + struct see_occr *next_occr = NULL; + + /* Free the linked list of the anticipatable occurrences. */ + while (curr_occr) + { + next_occr = curr_occr->next; + free (curr_occr); + curr_occr = next_occr; + } + + /* Free the linked list of the available occurrences. */ + curr_occr = extension->avail_occr; + while (curr_occr) + { + next_occr = curr_occr->next; + free (curr_occr); + curr_occr = next_occr; + } + + /* Free the see_pre_extension_expr structure itself. */ + free (extension); +} + + +/* Helper functions for the register_properties hash. + This kind of hash will hold see_register_properties structures. + + The value of the key is the regno field of the structure. */ + +/* Return TRUE if P1 has the same value in the regno field as P2. + Otherwise, return FALSE. + Where P1 and P2 are see_register_properties structures. */ + +static int +eq_descriptor_properties (const void *p1, const void *p2) +{ + const struct see_register_properties *curr_prop1 = p1; + const struct see_register_properties *curr_prop2 = p2; + + return (curr_prop1->regno == curr_prop2->regno); +} + + +/* P is a see_register_properties struct, use the register number in the + regno field. */ + +static hashval_t +hash_descriptor_properties (const void *p) +{ + const struct see_register_properties *curr_prop = p; + return curr_prop->regno; +} + + +/* Free the allocated memory of the current see_register_properties struct. */ +static void +hash_del_properties (void *p) +{ + struct see_register_properties *curr_prop = p; + free (curr_prop); +} + + +/* Helper functions for an extension hash. + This kind of hash will hold insns that look like: + + set ((reg:WIDEmode r1) (sign_extend:WIDEmode + (subreg:NARROWmode (reg:WIDEmode r2)))) + or + set ((reg:WIDEmode r1) (sign_extend:WIDEmode (reg:NARROWmode r2))) + + The value of the key is (REGNO (reg:WIDEmode r1)) + It is posibble to search this hash in two ways: + 1. By a register rtx. The Value that is been compared to the keys is the + REGNO of it. + 2. By an insn with the above pattern. The Value that is been compared to + the keys is the REGNO of the reg on the lhs. */ + +/* Return TRUE if P1 has the same value as P2. Otherwise, return FALSE. + Where P1 is an insn and P2 is an insn or a register. */ + +static int +eq_descriptor_extension (const void *p1, const void *p2) +{ + const rtx insn = (rtx) p1; + const rtx element = (rtx) p2; + rtx set1 = single_set (insn); + rtx set2 = NULL; + rtx dest_reg1 = NULL; + rtx dest_reg2 = NULL; + + gcc_assert (set1 && element && (REG_P (element) || INSN_P (element))); + + dest_reg1 = SET_DEST (set1); + + if (INSN_P (element)) + { + set2 = single_set (element); + dest_reg2 = SET_DEST (set2); + } + else + dest_reg2 = element; + + return (REGNO (dest_reg1) == REGNO (dest_reg2)); +} + + +/* If P is an insn, use the register number of its lhs + otherwise, P is a register, use its number. */ + +static hashval_t +hash_descriptor_extension (const void *p) +{ + const rtx r = (rtx) p; + rtx set = NULL; + rtx lhs = NULL; + + if (r && REG_P (r)) + return REGNO (r); + else + { + gcc_assert (r && INSN_P (r)); + set = single_set (r); + gcc_assert (set); + lhs = SET_DEST (set); + return REGNO (lhs); + } +} + + +/* Helper function for a see_bb_splay_ar[i] splay tree. + It frees all the allocated memory of a struct see_ref_s pointer. + + VALUE is the value of a splay tree node. */ + +static void +see_free_ref_s (splay_tree_value value) +{ + struct see_ref_s *ref_s = (struct see_ref_s *)value; + + if (ref_s->unmerged_def_se_hash) + htab_delete (ref_s->unmerged_def_se_hash); + if (ref_s->merged_def_se_hash) + htab_delete (ref_s->merged_def_se_hash); + if (ref_s->use_se_hash) + htab_delete (ref_s->use_se_hash); + free (ref_s); +} + + +/* Rest of the implementation. */ + +/* Search the extension hash for a suitable entry for EXTENSION. + TYPE is the type of EXTENSION (USE_EXTENSION or DEF_EXTENSION). + + If TYPE is DEF_EXTENSION we need to normalize EXTENSION before searching the + extension hash. + + If a suitable entry was found, return the slot. Otherwise, store EXTENSION + in the hash and return NULL. */ + +static struct see_pre_extension_expr * +see_seek_pre_extension_expr (rtx extension, enum extension_type type) +{ + struct see_pre_extension_expr **slot_pre_exp = NULL; + struct see_pre_extension_expr temp_pre_exp; + rtx dest_extension_reg = see_get_extension_reg (extension, 1); + enum rtx_code extension_code; + enum machine_mode source_extension_mode; + + if (type == DEF_EXTENSION) + { + extension_code = see_get_extension_data (extension, + &source_extension_mode); + gcc_assert (extension_code != UNKNOWN); + extension = + see_gen_normalized_extension (dest_extension_reg, extension_code, + source_extension_mode); + } + temp_pre_exp.se_insn = extension; + slot_pre_exp = + (struct see_pre_extension_expr **) htab_find_slot (see_pre_extension_hash, + &temp_pre_exp, INSERT); + if (*slot_pre_exp == NULL) + /* This is the first time this extension instruction is encountered. Store + it in the hash. */ + { + (*slot_pre_exp) = xmalloc (sizeof (struct see_pre_extension_expr)); + (*slot_pre_exp)->se_insn = extension; + (*slot_pre_exp)->bitmap_index = + (htab_elements (see_pre_extension_hash) - 1); + (*slot_pre_exp)->antic_occr = NULL; + (*slot_pre_exp)->avail_occr = NULL; + return NULL; + } + return *slot_pre_exp; +} + + +/* This function defines how to update the extra_info of the web_entry. + + FIRST is the pointer of the extra_info of the first web_entry. + SECOND is the pointer of the extra_info of the second web_entry. + The first web_entry will be the predecessor (leader) of the second web_entry + after the union. + + Return true if FIRST and SECOND points to the same web entry structure and + nothing is done. Otherwise, return false. */ + +static bool +see_update_leader_extra_info (struct web_entry *first, struct web_entry *second) +{ + struct see_entry_extra_info *first_ei = NULL; + struct see_entry_extra_info *second_ei = NULL; + + first = unionfind_root (first); + second = unionfind_root (second); + + if (unionfind_union (first, second)) + return true; + + first_ei = (struct see_entry_extra_info *)first->extra_info; + second_ei = (struct see_entry_extra_info *)second->extra_info; + + gcc_assert (first_ei && second_ei); + + if (second_ei->relevancy == NOT_RELEVANT) + { + first_ei->relevancy = NOT_RELEVANT; + return false; + } + switch (first_ei->relevancy) + { + case NOT_RELEVANT: + return false; + case RELEVANT_USE: + switch (second_ei->relevancy) + { + case RELEVANT_USE: + return false; + case EXTENDED_DEF: + first_ei->relevancy = second_ei->relevancy; + first_ei->source_mode_signed = second_ei->source_mode_signed; + first_ei->source_mode_unsigned = second_ei->source_mode_unsigned; + return false; + case SIGN_EXTENDED_DEF: + case ZERO_EXTENDED_DEF: + first_ei->relevancy = second_ei->relevancy; + first_ei->source_mode = second_ei->source_mode; + return false; + default: + gcc_unreachable (); + } + case SIGN_EXTENDED_DEF: + switch (second_ei->relevancy) + { + case SIGN_EXTENDED_DEF: + /* The mode of the root should be the wider one in this case. */ + first_ei->source_mode = + (first_ei->source_mode > second_ei->source_mode) ? + first_ei->source_mode : second_ei->source_mode; + return false; + case RELEVANT_USE: + return false; + case ZERO_EXTENDED_DEF: + /* Don't mix webs with zero extend and sign extend. */ + first_ei->relevancy = NOT_RELEVANT; + return false; + case EXTENDED_DEF: + if (second_ei->source_mode_signed == MAX_MACHINE_MODE) + first_ei->relevancy = NOT_RELEVANT; + else + /* The mode of the root should be the wider one in this case. */ + first_ei->source_mode = + (first_ei->source_mode > second_ei->source_mode_signed) ? + first_ei->source_mode : second_ei->source_mode_signed; + return false; + default: + gcc_unreachable (); + } + /* This case is similar to the previous one, with little changes. */ + case ZERO_EXTENDED_DEF: + switch (second_ei->relevancy) + { + case SIGN_EXTENDED_DEF: + /* Don't mix webs with zero extend and sign extend. */ + first_ei->relevancy = NOT_RELEVANT; + return false; + case RELEVANT_USE: + return false; + case ZERO_EXTENDED_DEF: + /* The mode of the root should be the wider one in this case. */ + first_ei->source_mode = + (first_ei->source_mode > second_ei->source_mode) ? + first_ei->source_mode : second_ei->source_mode; + return false; + case EXTENDED_DEF: + if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE) + first_ei->relevancy = NOT_RELEVANT; + else + /* The mode of the root should be the wider one in this case. */ + first_ei->source_mode = + (first_ei->source_mode > second_ei->source_mode_unsigned) ? + first_ei->source_mode : second_ei->source_mode_unsigned; + return false; + default: + gcc_unreachable (); + } + case EXTENDED_DEF: + if ((first_ei->source_mode_signed != MAX_MACHINE_MODE) + && (first_ei->source_mode_unsigned != MAX_MACHINE_MODE)) + { + switch (second_ei->relevancy) + { + case SIGN_EXTENDED_DEF: + first_ei->relevancy = SIGN_EXTENDED_DEF; + first_ei->source_mode = + (first_ei->source_mode_signed > second_ei->source_mode) ? + first_ei->source_mode_signed : second_ei->source_mode; + return false; + case RELEVANT_USE: + return false; + case ZERO_EXTENDED_DEF: + first_ei->relevancy = ZERO_EXTENDED_DEF; + first_ei->source_mode = + (first_ei->source_mode_unsigned > second_ei->source_mode) ? + first_ei->source_mode_unsigned : second_ei->source_mode; + return false; + case EXTENDED_DEF: + if (second_ei->source_mode_unsigned != MAX_MACHINE_MODE) + first_ei->source_mode_unsigned = + (first_ei->source_mode_unsigned > + second_ei->source_mode_unsigned) ? + first_ei->source_mode_unsigned : + second_ei->source_mode_unsigned; + if (second_ei->source_mode_signed != MAX_MACHINE_MODE) + first_ei->source_mode_signed = + (first_ei->source_mode_signed > + second_ei->source_mode_signed) ? + first_ei->source_mode_signed : second_ei->source_mode_signed; + return false; + default: + gcc_unreachable (); + } + } + else if (first_ei->source_mode_signed == MAX_MACHINE_MODE) + { + gcc_assert (first_ei->source_mode_unsigned != MAX_MACHINE_MODE); + switch (second_ei->relevancy) + { + case SIGN_EXTENDED_DEF: + first_ei->relevancy = NOT_RELEVANT; + return false; + case RELEVANT_USE: + return false; + case ZERO_EXTENDED_DEF: + first_ei->relevancy = ZERO_EXTENDED_DEF; + first_ei->source_mode = + (first_ei->source_mode_unsigned > second_ei->source_mode) ? + first_ei->source_mode_unsigned : second_ei->source_mode; + return false; + case EXTENDED_DEF: + if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE) + first_ei->relevancy = NOT_RELEVANT; + else + first_ei->source_mode_unsigned = + (first_ei->source_mode_unsigned > + second_ei->source_mode_unsigned) ? + first_ei->source_mode_unsigned : + second_ei->source_mode_unsigned; + return false; + default: + gcc_unreachable (); + } + } + else + { + gcc_assert (first_ei->source_mode_unsigned == MAX_MACHINE_MODE); + gcc_assert (first_ei->source_mode_signed != MAX_MACHINE_MODE); + switch (second_ei->relevancy) + { + case SIGN_EXTENDED_DEF: + first_ei->relevancy = SIGN_EXTENDED_DEF; + first_ei->source_mode = + (first_ei->source_mode_signed > second_ei->source_mode) ? + first_ei->source_mode_signed : second_ei->source_mode; + return false; + case RELEVANT_USE: + return false; + case ZERO_EXTENDED_DEF: + first_ei->relevancy = NOT_RELEVANT; + return false; + case EXTENDED_DEF: + if (second_ei->source_mode_signed == MAX_MACHINE_MODE) + first_ei->relevancy = NOT_RELEVANT; + else + first_ei->source_mode_signed = + (first_ei->source_mode_signed > + second_ei->source_mode_signed) ? + first_ei->source_mode_signed : second_ei->source_mode_signed; + return false; + default: + gcc_unreachable (); + } + } + default: + /* Unknown patern type. */ + gcc_unreachable (); + } +} + + +/* Free global data structures. */ + +static void +see_free_data_structures (void) +{ + int i; + unsigned int j; + + /* Free the bitmap vectors. */ + if (transp) + { + sbitmap_vector_free (transp); + transp = NULL; + sbitmap_vector_free (comp); + comp = NULL; + sbitmap_vector_free (antloc); + antloc = NULL; + sbitmap_vector_free (ae_kill); + ae_kill = NULL; + } + if (pre_insert_map) + { + sbitmap_vector_free (pre_insert_map); + pre_insert_map = NULL; + } + if (pre_delete_map) + { + sbitmap_vector_free (pre_delete_map); + pre_delete_map = NULL; + } + if (edge_list) + { + free_edge_list (edge_list); + edge_list = NULL; + } + + /* Free the extension hash. */ + htab_delete (see_pre_extension_hash); + + /* Free the array of hashes. */ + for (i = 0; i < last_bb; i++) + if (see_bb_hash_ar[i]) + htab_delete (see_bb_hash_ar[i]); + free (see_bb_hash_ar); + + /* Free the array of splay trees. */ + for (i = 0; i < last_bb; i++) + if (see_bb_splay_ar[i]) + splay_tree_delete (see_bb_splay_ar[i]); + free (see_bb_splay_ar); + + /* Free the array of web entries and their extra info field. */ + for (j = 0; j < defs_num; j++) + free (def_entry[j].extra_info); + free (def_entry); + for (j = 0; j < uses_num; j++) + free (use_entry[j].extra_info); + free (use_entry); +} + + +/* Initialize global data structures and variables. */ + +static void +see_initialize_data_structures (void) +{ + /* Build the df object. */ + df = df_init (DF_HARD_REGS | DF_EQUIV_NOTES | DF_SUBREGS); + df_rd_add_problem (df); + /* df_ru_add_problem (df); */ + df_chain_add_problem (df, DF_DU_CHAIN | DF_UD_CHAIN); + df_analyze (df); + + if (dump_file) + df_dump (df, dump_file); + + /* Record the last basic block at the beginning of the optimization. */ + last_bb = last_basic_block; + /* Record the number of uses at the beginning of the optimization. */ + uses_num = DF_USES_SIZE (df); + /* Record the number of definitions at the beginning of the optimization. */ + defs_num = DF_DEFS_SIZE (df); + + /* Allocate web entries array for the union-find data structure. */ + def_entry = xcalloc (defs_num, sizeof (struct web_entry)); + use_entry = xcalloc (uses_num, sizeof (struct web_entry)); + + /* Allocate an array of splay trees. + One splay tree for each basic block. */ + see_bb_splay_ar = xcalloc (last_bb, sizeof (splay_tree)); + + /* Allocate an array of hashes. + One hash for each basic block. */ + see_bb_hash_ar = xcalloc (last_bb, sizeof (htab_t)); + + /* Allocate the extension hash. It will hold the extensions that we want + to PRE. */ + see_pre_extension_hash = + htab_create (10, hash_descriptor_pre_extension, eq_descriptor_pre_extension, + hash_del_pre_extension); +} + + +/* Function called by note_uses to check if a register is used in a + subexpressions. + + X is a pointer to the subexpression and DATA is a pointer to a + see_mentioned_reg_data structure that contains the register to look for and + a place for the result. */ + +static void +see_mentioned_reg (rtx *x, void *data) +{ + struct see_mentioned_reg_data *d + = (struct see_mentioned_reg_data *) data; + + if (reg_mentioned_p (d->reg, *x)) + d->mentioned = true; +} + + +/* We don't want to merge a use extension with a reference if the extended + register is used only in a simple move instruction. We also don't want to + merge a def extension with a reference if the source register of the + extension is defined only in a simple move in the reference. + + REF is the reference instruction. + EXTENSION is the use extension or def extension instruction. + TYPE is the type of the extension (use or def). + + Return true if the reference is complicated enough, so we would like to merge + it with the extension. Otherwise, return false. */ + +static bool +see_want_to_be_merged_with_extension (rtx ref, rtx extension, + enum extension_type type) +{ + rtx pat = NULL; + rtx dest_extension_reg = see_get_extension_reg (extension, 1); + rtx source_extension_reg = see_get_extension_reg (extension, 0); + enum rtx_code code; + struct see_mentioned_reg_data d; + int i; + + pat = PATTERN (ref); + code = GET_CODE (pat); + + if (code == PARALLEL) + { + for (i = 0; i < XVECLEN (pat, 0); i++) + { + rtx sub = XVECEXP (pat, 0, i); + + if ((GET_CODE (sub) == SET) + && (REG_P (SET_DEST (sub)) + || ((GET_CODE (SET_DEST (sub)) == SUBREG) + && (REG_P (SUBREG_REG (SET_DEST (sub)))))) + && (REG_P (SET_SRC (sub)) + || ((GET_CODE (SET_SRC (sub)) == SUBREG) + && (REG_P (SUBREG_REG (SET_SRC (sub))))))) + { + /* This is a simple move SET. */ + if ((type == DEF_EXTENSION) + && reg_mentioned_p (source_extension_reg, SET_DEST (sub))) + return false; + } + else + { + /* This is not a simple move SET. + Check if it uses the source of the extension. */ + if (type == USE_EXTENSION) + { + d.reg = dest_extension_reg; + d.mentioned = false; + note_uses (&sub, see_mentioned_reg, &d); + if (d.mentioned) + return true; + } + } + } + if (type == USE_EXTENSION) + return false; + } + else + { + if ((code == SET) + && (REG_P (SET_DEST (pat)) + || ((GET_CODE (SET_DEST (pat)) == SUBREG) + && (REG_P (SUBREG_REG (SET_DEST (pat)))))) + && (REG_P (SET_SRC (pat)) + || ((GET_CODE (SET_SRC (pat)) == SUBREG) + && (REG_P (SUBREG_REG (SET_SRC (pat))))))) + /* This is a simple move SET. */ + return false; + } + + return true; +} + + +/* Print the register number of the current see_register_properties + structure. + + This is a subroutine of see_main called via htab_traverse. + SLOT contains the current see_register_properties structure pointer. */ + +static int +see_print_register_properties (void **slot, void *b ATTRIBUTE_UNUSED) +{ + struct see_register_properties *prop = *slot; + + gcc_assert (prop); + fprintf (dump_file, "Property found for register %d\n", prop->regno); + return 1; +} + + +/* Print the extension instruction of the current see_register_properties + structure. + + This is a subroutine of see_main called via htab_traverse. + SLOT contains the current see_pre_extension_expr structure pointer. */ + +static int +see_print_pre_extension_expr (void **slot, void *b ATTRIBUTE_UNUSED) +{ + struct see_pre_extension_expr *pre_extension = *slot; + + gcc_assert (pre_extension + && pre_extension->se_insn + && INSN_P (pre_extension->se_insn)); + + fprintf (dump_file, "Index %d for:\n", pre_extension->bitmap_index); + print_rtl_single (dump_file, pre_extension->se_insn); + + return 1; +} + + +/* Phase 4 implementation: Commit changes to the insn stream. */ + +/* Delete the merged def extension. + + This is a subroutine of see_commit_ref_changes called via htab_traverse. + + SLOT contains the current def extension instruction. + B is the see_ref_s structure pointer. */ + +static int +see_delete_merged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED) +{ + rtx def_se = *slot; + + if (dump_file) + { + fprintf (dump_file, "Deleting merged def extension:\n"); + print_rtl_single (dump_file, def_se); + } + + if (INSN_DELETED_P (def_se)) + /* This def extension is an implicit one. No need to delete it since + it is not in the insn stream. */ + return 1; + + delete_insn (def_se); + return 1; +} + + +/* Delete the unmerged def extension. + + This is a subroutine of see_commit_ref_changes called via htab_traverse. + + SLOT contains the current def extension instruction. + B is the see_ref_s structure pointer. */ + +static int +see_delete_unmerged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED) +{ + rtx def_se = *slot; + + if (dump_file) + { + fprintf (dump_file, "Deleting unmerged def extension:\n"); + print_rtl_single (dump_file, def_se); + } + + delete_insn (def_se); + return 1; +} + + +/* Emit the non-redundant use extension to the instruction stream. + + This is a subroutine of see_commit_ref_changes called via htab_traverse. + + SLOT contains the current use extension instruction. + B is the see_ref_s structure pointer. */ + +static int +see_emit_use_extension (void **slot, void *b) +{ + rtx use_se = *slot; + struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; + + if (INSN_DELETED_P (use_se)) + /* This use extension was previously removed according to the lcm + output. */ + return 1; + + if (dump_file) + { + fprintf (dump_file, "Inserting use extension:\n"); + print_rtl_single (dump_file, use_se); + } + + add_insn_before (use_se, curr_ref_s->insn); + + return 1; +} + + +/* For each relevant reference: + a. Emit the non-redundant use extensions. + b. Delete the def extensions. + c. Replace the original reference with the merged one (if exists) and add the + move instructions that were generated. + + This is a subroutine of see_commit_changes called via splay_tree_foreach. + + STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a + see_ref_s structure. */ + +static int +see_commit_ref_changes (splay_tree_node stn, + void *data ATTRIBUTE_UNUSED) +{ + htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash; + htab_t unmerged_def_se_hash = + ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash; + htab_t merged_def_se_hash = + ((struct see_ref_s *) (stn->value))->merged_def_se_hash; + rtx ref = ((struct see_ref_s *) (stn->value))->insn; + rtx merged_ref = ((struct see_ref_s *) (stn->value))->merged_insn; + + /* Emit the non-redundant use extensions. */ + if (use_se_hash) + htab_traverse_noresize (use_se_hash, see_emit_use_extension, + (PTR) (stn->value)); + + /* Delete the def extensions. */ + if (unmerged_def_se_hash) + htab_traverse (unmerged_def_se_hash, see_delete_unmerged_def_extension, + (PTR) (stn->value)); + + if (merged_def_se_hash) + htab_traverse (merged_def_se_hash, see_delete_merged_def_extension, + (PTR) (stn->value)); + + /* Replace the original reference with the merged one (if exists) and add the + move instructions that were generated. */ + if (merged_ref && !INSN_DELETED_P (ref)) + { + if (dump_file) + { + fprintf (dump_file, "Replacing orig reference:\n"); + print_rtl_single (dump_file, ref); + fprintf (dump_file, "With merged reference:\n"); + print_rtl_single (dump_file, merged_ref); + } + emit_insn_after (merged_ref, ref); + delete_insn (ref); + } + + /* Continue to the next reference. */ + return 0; +} + + +/* Insert partially redundant expressions on edges to make the expressions fully + redundant. + + INDEX_MAP is a mapping of an index to an expression. + Return true if an instruction was insertedon an edge. + Otherwise, return false. */ + +static bool +see_pre_insert_extensions (struct see_pre_extension_expr **index_map) +{ + int num_edges = NUM_EDGES (edge_list); + int set_size = pre_insert_map[0]->size; + size_t pre_extension_num = htab_elements (see_pre_extension_hash); + + int did_insert = 0; + int e; + int i; + int j; + + for (e = 0; e < num_edges; e++) + { + int indx; + basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e); + + for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS) + { + SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i]; + + for (j = indx; insert && j < (int) pre_extension_num; + j++, insert >>= 1) + if (insert & 1) + { + struct see_pre_extension_expr *expr = index_map[j]; + int idx = expr->bitmap_index; + rtx se_insn = NULL; + edge eg = INDEX_EDGE (edge_list, e); + + start_sequence (); + emit_insn (PATTERN (expr->se_insn)); + se_insn = get_insns (); + end_sequence (); + + if (eg->flags & EDGE_ABNORMAL) + { + rtx new_insn = NULL; + + new_insn = insert_insn_end_bb_new (se_insn, bb); + gcc_assert (new_insn && INSN_P (new_insn)); + + if (dump_file) + { + fprintf (dump_file, + "PRE: end of bb %d, insn %d, ", + bb->index, INSN_UID (new_insn)); + fprintf (dump_file, + "inserting expression %d\n", idx); + } + } + else + { + insert_insn_on_edge (se_insn, eg); + + if (dump_file) + { + fprintf (dump_file, "PRE: edge (%d,%d), ", + bb->index, + INDEX_EDGE_SUCC_BB (edge_list, e)->index); + fprintf (dump_file, "inserting expression %d\n", idx); + } + } + did_insert = true; + } + } + } + return did_insert; +} + + +/* Since all the redundant extensions must be anticipatable, they must be a use + extensions. Mark them as deleted. This will prevent them from been emitted + in the first place. + + This is a subroutine of see_commit_changes called via htab_traverse. + + SLOT contains the current see_pre_extension_expr structure pointer. */ + +static int +see_pre_delete_extension (void **slot, void *b ATTRIBUTE_UNUSED) +{ + struct see_pre_extension_expr *expr = *slot; + struct see_occr *occr; + int indx = expr->bitmap_index; + + for (occr = expr->antic_occr; occr != NULL; occr = occr->next) + { + if (TEST_BIT (pre_delete_map[occr->block_num], indx)) + { + /* Mark as deleted. */ + INSN_DELETED_P (occr->insn) = 1; + if (dump_file) + { + fprintf (dump_file,"Redundant extension deleted:\n"); + print_rtl_single (dump_file, occr->insn); + } + } + } + return 1; +} + + +/* Create the index_map mapping of an index to an expression. + + This is a subroutine of see_commit_changes called via htab_traverse. + + SLOT contains the current see_pre_extension_expr structure pointer. + B a pointer to see_pre_extension_expr structure pointer. */ + +static int +see_map_extension (void **slot, void *b) +{ + struct see_pre_extension_expr *expr = *slot; + struct see_pre_extension_expr **index_map = + (struct see_pre_extension_expr **) b; + + index_map[expr->bitmap_index] = expr; + + return 1; +} + + +/* Phase 4 top level function. + In this phase we finally change the instruction stream. + Here we insert extensions at their best placements and delete the + redundant ones according to the output of the LCM. We also replace + some of the instructions according to phase 2 merges results. */ + +static void +see_commit_changes (void) +{ + struct see_pre_extension_expr **index_map; + size_t pre_extension_num = htab_elements (see_pre_extension_hash); + bool did_insert = false; + int i; + + index_map = xcalloc (pre_extension_num, + sizeof (struct see_pre_extension_expr *)); + + if (dump_file) + fprintf (dump_file, + "* Phase 4: Commit changes to the insn stream. *\n"); + + /* Produce a mapping of all the pre_extensions. */ + htab_traverse (see_pre_extension_hash, see_map_extension, (PTR) index_map); + + /* Delete redundant extension. This will prevent them from been emitted in + the first place. */ + htab_traverse (see_pre_extension_hash, see_pre_delete_extension, NULL); + + /* At this point, we must free the DF object, since the number of basic blocks + may change. */ + df_finish (df); + df = NULL; + + /* Insert extensions on edges, according to the LCM result. */ + did_insert = see_pre_insert_extensions (index_map); + + if (did_insert) + commit_edge_insertions (); + + /* Commit the rest of the changes. */ + for (i = 0; i < last_bb; i++) + { + if (see_bb_splay_ar[i]) + { + /* Traverse over all the references in the basic block in forward + order. */ + splay_tree_foreach (see_bb_splay_ar[i], + see_commit_ref_changes, NULL); + } + } + + free (index_map); +} + + +/* Phase 3 implementation: Eliminate globally redundant extensions. */ + +/* Analyze the properties of a merged def extension for the LCM and record avail + occurrences. + + This is a subroutine of see_analyze_ref_local_prop called + via htab_traverse. + + SLOT contains the current def extension instruction. + B is the see_ref_s structure pointer. */ + +static int +see_analyze_merged_def_local_prop (void **slot, void *b) +{ + rtx def_se = *slot; + struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; + rtx ref = curr_ref_s->insn; + struct see_pre_extension_expr *extension_expr = NULL; + int indx; + int bb_num = BLOCK_NUM (ref); + htab_t curr_bb_hash = NULL; + struct see_register_properties *curr_prop = NULL; + struct see_register_properties **slot_prop = NULL; + struct see_register_properties temp_prop; + rtx dest_extension_reg = see_get_extension_reg (def_se, 1); + struct see_occr *curr_occr = NULL; + struct see_occr *tmp_occr = NULL; + + extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION); + /* The extension_expr must be found. */ + gcc_assert (extension_expr); + + curr_bb_hash = see_bb_hash_ar[bb_num]; + gcc_assert (curr_bb_hash); + temp_prop.regno = REGNO (dest_extension_reg); + slot_prop = + (struct see_register_properties **) htab_find_slot (curr_bb_hash, + &temp_prop, INSERT); + curr_prop = *slot_prop; + gcc_assert (curr_prop); + + indx = extension_expr->bitmap_index; + + /* Reset the transparency bit. */ + RESET_BIT (transp[bb_num], indx); + /* Reset the killed bit. */ + RESET_BIT (ae_kill[bb_num], indx); + + if (curr_prop->first_se_after_last_def == DF_INSN_LUID (df, ref)) + { + /* Set the available bit. */ + SET_BIT (comp[bb_num], indx); + /* Record the available occurrence. */ + curr_occr = xmalloc (sizeof (struct see_occr)); + curr_occr->next = NULL; + curr_occr->insn = def_se; + curr_occr->block_num = bb_num; + tmp_occr = extension_expr->avail_occr; + if (!tmp_occr) + extension_expr->avail_occr = curr_occr; + else + { + while (tmp_occr->next) + tmp_occr = tmp_occr->next; + tmp_occr->next = curr_occr; + } + } + + return 1; +} + + +/* Analyze the properties of a unmerged def extension for the LCM. + + This is a subroutine of see_analyze_ref_local_prop called + via htab_traverse. + + SLOT contains the current def extension instruction. + B is the see_ref_s structure pointer. */ + +static int +see_analyze_unmerged_def_local_prop (void **slot, void *b) +{ + rtx def_se = *slot; + struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; + rtx ref = curr_ref_s->insn; + struct see_pre_extension_expr *extension_expr = NULL; + int indx; + int bb_num = BLOCK_NUM (ref); + htab_t curr_bb_hash = NULL; + struct see_register_properties *curr_prop = NULL; + struct see_register_properties **slot_prop = NULL; + struct see_register_properties temp_prop; + rtx dest_extension_reg = see_get_extension_reg (def_se, 1); + + extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION); + /* The extension_expr must be found. */ + gcc_assert (extension_expr); + + curr_bb_hash = see_bb_hash_ar[bb_num]; + gcc_assert (curr_bb_hash); + temp_prop.regno = REGNO (dest_extension_reg); + slot_prop = + (struct see_register_properties **) htab_find_slot (curr_bb_hash, + &temp_prop, INSERT); + curr_prop = *slot_prop; + gcc_assert (curr_prop); + + indx = extension_expr->bitmap_index; + + /* Reset the transparency bit. */ + RESET_BIT (transp[bb_num], indx); + /* Set the killed bit. */ + SET_BIT (ae_kill[bb_num], indx); + + return 1; +} + + +/* Analyze the properties of a use extension for the LCM and record anic and + avail occurrences. + + This is a subroutine of see_analyze_ref_local_prop called + via htab_traverse. + + SLOT contains the current use extension instruction. + B is the see_ref_s structure pointer. */ + +static int +see_analyze_use_local_prop (void **slot, void *b) +{ + struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; + rtx use_se = *slot; + rtx ref = curr_ref_s->insn; + rtx dest_extension_reg = see_get_extension_reg (use_se, 1); + struct see_pre_extension_expr *extension_expr = NULL; + struct see_register_properties *curr_prop = NULL; + struct see_register_properties **slot_prop = NULL; + struct see_register_properties temp_prop; + struct see_occr *curr_occr = NULL; + struct see_occr *tmp_occr = NULL; + htab_t curr_bb_hash = NULL; + int indx; + int bb_num = BLOCK_NUM (ref); + + extension_expr = see_seek_pre_extension_expr (use_se, USE_EXTENSION); + /* The extension_expr must be found. */ + gcc_assert (extension_expr); + + curr_bb_hash = see_bb_hash_ar[bb_num]; + gcc_assert (curr_bb_hash); + temp_prop.regno = REGNO (dest_extension_reg); + slot_prop = + (struct see_register_properties **) htab_find_slot (curr_bb_hash, + &temp_prop, INSERT); + curr_prop = *slot_prop; + gcc_assert (curr_prop); + + indx = extension_expr->bitmap_index; + + if (curr_prop->first_se_before_any_def == DF_INSN_LUID (df, ref)) + { + /* Set the anticipatable bit. */ + SET_BIT (antloc[bb_num], indx); + /* Record the anticipatable occurrence. */ + curr_occr = xmalloc (sizeof (struct see_occr)); + curr_occr->next = NULL; + curr_occr->insn = use_se; + curr_occr->block_num = bb_num; + tmp_occr = extension_expr->antic_occr; + if (!tmp_occr) + extension_expr->antic_occr = curr_occr; + else + { + while (tmp_occr->next) + tmp_occr = tmp_occr->next; + tmp_occr->next = curr_occr; + } + if (curr_prop->last_def < 0) + { + /* Set the available bit. */ + SET_BIT (comp[bb_num], indx); + /* Record the available occurrence. */ + curr_occr = xmalloc (sizeof (struct see_occr)); + curr_occr->next = NULL; + curr_occr->insn = use_se; + curr_occr->block_num = bb_num; + tmp_occr = extension_expr->avail_occr; + if (!tmp_occr) + extension_expr->avail_occr = curr_occr; + else + { + while (tmp_occr->next) + tmp_occr = tmp_occr->next; + tmp_occr->next = curr_occr; + } + } + /* Note: there is no need to reset the killed bit since it must be zero at + this point. */ + } + else if (curr_prop->first_se_after_last_def == DF_INSN_LUID (df, ref)) + { + /* Set the available bit. */ + SET_BIT (comp[bb_num], indx); + /* Reset the killed bit. */ + RESET_BIT (ae_kill[bb_num], indx); + /* Record the available occurrence. */ + curr_occr = xmalloc (sizeof (struct see_occr)); + curr_occr->next = NULL; + curr_occr->insn = use_se; + curr_occr->block_num = bb_num; + tmp_occr = extension_expr->avail_occr; + if (!tmp_occr) + extension_expr->avail_occr = curr_occr; + else + { + while (tmp_occr->next) + tmp_occr = tmp_occr->next; + tmp_occr->next = curr_occr; + } + } + return 1; +} + + +/* Here we traverse over all the merged and unmerged extensions of the reference + and analyze their properties for the LCM. + + This is a subroutine of see_execute_LCM called via splay_tree_foreach. + + STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a + see_ref_s structure. */ + +static int +see_analyze_ref_local_prop (splay_tree_node stn, + void *data ATTRIBUTE_UNUSED) +{ + htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash; + htab_t unmerged_def_se_hash = + ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash; + htab_t merged_def_se_hash = + ((struct see_ref_s *) (stn->value))->merged_def_se_hash; + + /* Analyze use extensions that were not merged with the reference. */ + if (use_se_hash) + htab_traverse_noresize (use_se_hash, see_analyze_use_local_prop, + (PTR) (stn->value)); + + /* Analyze def extensions that were not merged with the reference. */ + if (unmerged_def_se_hash) + htab_traverse (unmerged_def_se_hash, see_analyze_unmerged_def_local_prop, + (PTR) (stn->value)); + + /* Analyze def extensions that were merged with the reference. */ + if (merged_def_se_hash) + htab_traverse (merged_def_se_hash, see_analyze_merged_def_local_prop, + (PTR) (stn->value)); + + /* Continue to the next definition. */ + return 0; +} + + +/* Phase 3 top level function. + In this phase, we set the input bit vectors of the LCM according to data + gathered in phase 2. + Then we run the edge based LCM. */ + +static void +see_execute_LCM (void) +{ + size_t pre_extension_num = htab_elements (see_pre_extension_hash); + int i = 0; + + if (dump_file) + fprintf (dump_file, + "* Phase 3: Eliminate globally redundant extensions. *\n"); + + /* Initialize the global sbitmap vectors. */ + transp = sbitmap_vector_alloc (last_bb, pre_extension_num); + comp = sbitmap_vector_alloc (last_bb, pre_extension_num); + antloc = sbitmap_vector_alloc (last_bb, pre_extension_num); + ae_kill = sbitmap_vector_alloc (last_bb, pre_extension_num); + sbitmap_vector_ones (transp, last_bb); + sbitmap_vector_zero (comp, last_bb); + sbitmap_vector_zero (antloc, last_bb); + sbitmap_vector_zero (ae_kill, last_bb); + + /* Traverse over all the splay trees of the basic blocks. */ + for (i = 0; i < last_bb; i++) + { + if (see_bb_splay_ar[i]) + { + /* Traverse over all the references in the basic block in forward + order. */ + splay_tree_foreach (see_bb_splay_ar[i], + see_analyze_ref_local_prop, NULL); + } + } + + /* Add fake exit edges before running the lcm. */ + add_noreturn_fake_exit_edges (); + + /* Run the LCM. */ + edge_list = pre_edge_lcm (pre_extension_num, transp, comp, antloc, + ae_kill, &pre_insert_map, &pre_delete_map); + + /* Remove the fake edges. */ + remove_fake_exit_edges (); +} + + +/* Phase 2 implementation: Merge and eliminate locally redundant extensions. */ + +/* In this function we set the register properties for the register that is + defined and extended in the reference. + The properties are defined in see_register_properties structure which is + allocated per basic bloack and per register. + Later the extension is inserted into the see_pre_extension_hash for the next + phase of the optimization. + + This is a subroutine of see_handle_extensions_for_one_ref called + via htab_traverse. + + SLOT contains the current def extension instruction. + B is the see_ref_s structure pointer. */ + +static int +see_set_prop_merged_def (void **slot, void *b) +{ + rtx def_se = *slot; + struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; + rtx insn = curr_ref_s->insn; + rtx dest_extension_reg = see_get_extension_reg (def_se, 1); + htab_t curr_bb_hash = NULL; + struct see_register_properties *curr_prop = NULL; + struct see_register_properties **slot_prop = NULL; + struct see_register_properties temp_prop; + int ref_luid = DF_INSN_LUID (df, insn); + + curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)]; + if (!curr_bb_hash) + { + /* The hash doesn't exist yet. Create it. */ + curr_bb_hash = + htab_create (10, hash_descriptor_properties, eq_descriptor_properties, + hash_del_properties); + see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash; + } + + /* Find the right register properties in the right basic block. */ + temp_prop.regno = REGNO (dest_extension_reg); + slot_prop = + (struct see_register_properties **) htab_find_slot (curr_bb_hash, + &temp_prop, INSERT); + + if (slot_prop && (*slot_prop != NULL)) + { + /* Property already exists. */ + curr_prop = *slot_prop; + gcc_assert (curr_prop->regno == REGNO (dest_extension_reg)); + + curr_prop->last_def = ref_luid; + curr_prop->first_se_after_last_def = ref_luid; + } + else + { + /* Property doesn't exist yet. */ + curr_prop = xmalloc (sizeof (struct see_register_properties)); + curr_prop->regno = REGNO (dest_extension_reg); + curr_prop->last_def = ref_luid; + curr_prop->first_se_before_any_def = -1; + curr_prop->first_se_after_last_def = ref_luid; + *slot_prop = curr_prop; + } + + /* Insert the def_se into see_pre_extension_hash if it isn't already + there. */ + see_seek_pre_extension_expr (def_se, DEF_EXTENSION); + + return 1; +} + + +/* In this function we set the register properties for the register that is + defined but not extended in the reference. + The properties are defined in see_register_properties structure which is + allocated per basic bloack and per register. + Later the extension is inserted into the see_pre_extension_hash for the next + phase of the optimization. + + This is a subroutine of see_handle_extensions_for_one_ref called + via htab_traverse. + + SLOT contains the current def extension instruction. + B is the see_ref_s structure pointer. */ + +static int +see_set_prop_unmerged_def (void **slot, void *b) +{ + rtx def_se = *slot; + struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; + rtx insn = curr_ref_s->insn; + rtx dest_extension_reg = see_get_extension_reg (def_se, 1); + htab_t curr_bb_hash = NULL; + struct see_register_properties *curr_prop = NULL; + struct see_register_properties **slot_prop = NULL; + struct see_register_properties temp_prop; + int ref_luid = DF_INSN_LUID (df, insn); + + curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)]; + if (!curr_bb_hash) + { + /* The hash doesn't exist yet. Create it. */ + curr_bb_hash = + htab_create (10, hash_descriptor_properties, eq_descriptor_properties, + hash_del_properties); + see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash; + } + + /* Find the right register properties in the right basic block. */ + temp_prop.regno = REGNO (dest_extension_reg); + slot_prop = + (struct see_register_properties **) htab_find_slot (curr_bb_hash, + &temp_prop, INSERT); + + if (slot_prop && (*slot_prop != NULL)) + { + /* Property already exists. */ + curr_prop = *slot_prop; + gcc_assert (curr_prop->regno == REGNO (dest_extension_reg)); + + curr_prop->last_def = ref_luid; + curr_prop->first_se_after_last_def = -1; + } + else + { + /* Property doesn't exist yet. */ + curr_prop = xmalloc (sizeof (struct see_register_properties)); + curr_prop->regno = REGNO (dest_extension_reg); + curr_prop->last_def = ref_luid; + curr_prop->first_se_before_any_def = -1; + curr_prop->first_se_after_last_def = -1; + *slot_prop = curr_prop; + } + + /* Insert the def_se into see_pre_extension_hash if it isn't already + there. */ + see_seek_pre_extension_expr (def_se, DEF_EXTENSION); + + return 1; +} + + +/* In this function we set the register properties for the register that is used + in the reference. + The properties are defined in see_register_properties structure which is + allocated per basic bloack and per register. + When a redundant use extension is found it is removed from the hash of the + reference. + If the extension is non redundant it is inserted into the + see_pre_extension_hash for the next phase of the optimization. + + This is a subroutine of see_handle_extensions_for_one_ref called + via htab_traverse. + + SLOT contains the current use extension instruction. + B is the see_ref_s structure pointer. */ + +static int +see_set_prop_unmerged_use (void **slot, void *b) +{ + rtx use_se = *slot; + struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; + rtx insn = curr_ref_s->insn; + rtx dest_extension_reg = see_get_extension_reg (use_se, 1); + htab_t curr_bb_hash = NULL; + struct see_register_properties *curr_prop = NULL; + struct see_register_properties **slot_prop = NULL; + struct see_register_properties temp_prop; + bool locally_redundant = false; + int ref_luid = DF_INSN_LUID (df, insn); + + curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)]; + if (!curr_bb_hash) + { + /* The hash doesn't exist yet. Create it. */ + curr_bb_hash = + htab_create (10, hash_descriptor_properties, eq_descriptor_properties, + hash_del_properties); + see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash; + } + + /* Find the right register properties in the right basic block. */ + temp_prop.regno = REGNO (dest_extension_reg); + slot_prop = + (struct see_register_properties **) htab_find_slot (curr_bb_hash, + &temp_prop, INSERT); + + if (slot_prop && (*slot_prop != NULL)) + { + /* Property already exists. */ + curr_prop = *slot_prop; + gcc_assert (curr_prop->regno == REGNO (dest_extension_reg)); + + + if ((curr_prop->last_def < 0) && (curr_prop->first_se_before_any_def < 0)) + curr_prop->first_se_before_any_def = ref_luid; + else if ((curr_prop->last_def < 0) + && (curr_prop->first_se_before_any_def >= 0)) + { + /* In this case the extension is localy redundant. */ + htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot); + locally_redundant = true; + } + else if ((curr_prop->last_def >= 0) + && (curr_prop->first_se_after_last_def < 0)) + curr_prop->first_se_after_last_def = ref_luid; + else if ((curr_prop->last_def >= 0) + && (curr_prop->first_se_after_last_def >= 0)) + { + /* In this case the extension is localy redundant. */ + htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot); + locally_redundant = true; + } + else + gcc_unreachable (); + } + else + { + /* Property doesn't exist yet. Create a new one. */ + curr_prop = xmalloc (sizeof (struct see_register_properties)); + curr_prop->regno = REGNO (dest_extension_reg); + curr_prop->last_def = -1; + curr_prop->first_se_before_any_def = ref_luid; + curr_prop->first_se_after_last_def = -1; + *slot_prop = curr_prop; + } + + /* Insert the use_se into see_pre_extension_hash if it isn't already + there. */ + if (!locally_redundant) + see_seek_pre_extension_expr (use_se, USE_EXTENSION); + if (locally_redundant && dump_file) + { + fprintf (dump_file, "Locally redundant extension:\n"); + print_rtl_single (dump_file, use_se); + } + return 1; +} + + +/* Print an extension instruction. + + This is a subroutine of see_handle_extensions_for_one_ref called + via htab_traverse. + SLOT contains the extension instruction. */ + +static int +see_print_one_extension (void **slot, void *b ATTRIBUTE_UNUSED) +{ + rtx def_se = *slot; + + gcc_assert (def_se && INSN_P (def_se)); + print_rtl_single (dump_file, def_se); + + return 1; +} + +/* Function called by note_uses to replace used subexpressions. + + X is a pointer to the subexpression and DATA is a pointer to a + see_replace_data structure that contains the data for the replacement. */ + +static void +see_replace_src (rtx *x, void *data) +{ + struct see_replace_data *d + = (struct see_replace_data *) data; + + *x = replace_rtx (*x, d->from, d->to); +} + + +/* At this point the pattern is expected to be: + + ref: set (dest_reg) (rhs) + def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg)) + + The merge of these two instructions didn't succeed. + + We try to generate the pattern: + set (subreg (dest_extension_reg)) (rhs) + + We do this in 4 steps: + a. Replace every use of dest_reg with a new preudo register. + b. Replace every instance of dest_reg with the subreg. + c. Replace every use of the new pseudo register back to dest_reg. + d. Try to recognize and simplify. + + If the manipulation failed, leave the original ref but try to generate and + recognize a simple move instruction: + set (subreg (dest_extension_reg)) (dest_reg) + This move instruction will be emitted right after the ref to the instruction + stream and assure the correctness of the code after def_se will be removed. + + CURR_REF_S is the current reference. + DEF_SE is the extension that couldn't be merged. */ + +static void +see_def_extension_not_merged (struct see_ref_s *curr_ref_s, rtx def_se) +{ + struct see_replace_data d; + /* If the original insn was already merged with an extension before, + take the merged one. */ + rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn : + curr_ref_s->insn; + rtx merged_ref_next = (curr_ref_s->merged_insn) ? + NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX; + rtx ref_copy = copy_rtx (ref); + rtx dest_reg = NULL; + rtx dest_real_reg = NULL; + rtx source_extension_reg = see_get_extension_reg (def_se, 0); + rtx dest_extension_reg = see_get_extension_reg (def_se, 1); + rtx new_pseudo_reg = NULL; + rtx subreg = NULL; + rtx move_insn = NULL; + rtx set = NULL; + rtx rhs = NULL; + enum machine_mode source_extension_mode = GET_MODE (source_extension_reg); + enum machine_mode dest_mode; + + set = single_set (def_se); + gcc_assert (set); + rhs = SET_SRC (set); + gcc_assert ((GET_CODE (rhs) == SIGN_EXTEND) + || (GET_CODE (rhs) == ZERO_EXTEND)); + dest_reg = XEXP (rhs, 0); + gcc_assert (REG_P (dest_reg) + || ((GET_CODE (dest_reg) == SUBREG) + && REG_P (SUBREG_REG (dest_reg)))); + dest_real_reg = REG_P (dest_reg) ? dest_reg : SUBREG_REG (dest_reg); + dest_mode = GET_MODE (dest_reg); + + subreg = gen_lowpart_SUBREG (dest_mode, dest_extension_reg); + new_pseudo_reg = gen_reg_rtx (source_extension_mode); + + /* Step a: Replace every use of dest_real_reg with a new preudo register. */ + d.from = dest_real_reg; + d.to = new_pseudo_reg; + note_uses (&PATTERN (ref_copy), see_replace_src, &d); + /* Step b: Replace every instance of dest_reg with the subreg. */ + ref_copy = replace_rtx (ref_copy, dest_reg, subreg); + + /* Step c: Replace every use of the new pseudo register back to + dest_real_reg. */ + d.from = new_pseudo_reg; + d.to = dest_real_reg; + note_uses (&PATTERN (ref_copy), see_replace_src, &d); + + if (rtx_equal_p (PATTERN (ref), PATTERN (ref_copy)) + || insn_invalid_p (ref_copy)) + { + /* The manipulation failed. */ + + /* Create a new copy. */ + ref_copy = copy_rtx (ref); + + /* Create a simple move instruction that will replace the def_se. */ + start_sequence (); + emit_move_insn (subreg, dest_reg); + move_insn = get_insns (); + end_sequence (); + + /* Link the manipulated instruction to the newly created move instruction + and to the former created move instructions. */ + PREV_INSN (ref_copy) = NULL_RTX; + NEXT_INSN (ref_copy) = move_insn; + PREV_INSN (move_insn) = ref_copy; + NEXT_INSN (move_insn) = merged_ref_next; + if (merged_ref_next != NULL_RTX) + PREV_INSN (merged_ref_next) = move_insn; + curr_ref_s->merged_insn = ref_copy; + + if (dump_file) + { + fprintf (dump_file, "Following def merge failure a move "); + fprintf (dump_file, "insn was added after the ref.\n"); + fprintf (dump_file, "Original ref:\n"); + print_rtl_single (dump_file, ref); + fprintf (dump_file, "Move insn that was added:\n"); + print_rtl_single (dump_file, move_insn); + } + return; + } + + /* The manipulation succeeded. Store the new manipulated reference. */ + + /* Try to simplify the new manipulated insn. */ + validate_simplify_insn (ref_copy); + + /* Create a simple move instruction to assure the correctness of the code. */ + start_sequence (); + emit_move_insn (dest_reg, subreg); + move_insn = get_insns (); + end_sequence (); + + /* Link the manipulated instruction to the newly created move instruction and + to the former created move instructions. */ + PREV_INSN (ref_copy) = NULL_RTX; + NEXT_INSN (ref_copy) = move_insn; + PREV_INSN (move_insn) = ref_copy; + NEXT_INSN (move_insn) = merged_ref_next; + if (merged_ref_next != NULL_RTX) + PREV_INSN (merged_ref_next) = move_insn; + curr_ref_s->merged_insn = ref_copy; + + if (dump_file) + { + fprintf (dump_file, "Following merge failure the ref was transformed!\n"); + fprintf (dump_file, "Original ref:\n"); + print_rtl_single (dump_file, ref); + fprintf (dump_file, "Transformed ref:\n"); + print_rtl_single (dump_file, ref_copy); + fprintf (dump_file, "Move insn that was added:\n"); + print_rtl_single (dump_file, move_insn); + } +} + + +/* Merge the reference instruction (ref) with the current use extension. + + use_se extends a NARROWmode register to a WIDEmode register. + ref uses the WIDEmode register. + + The pattern we try to merge is this: + use_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg)) + ref: use (dest_extension_reg) + + where dest_extension_reg and source_extension_reg can be subregs. + + The merge is done by generating, simplifying and recognizing the pattern: + use (sign/zero_extend (source_extension_reg)) + + If ref is too simple (according to see_want_to_be_merged_with_extension ()) + we don't try to merge it with use_se and we continue as if the merge failed. + + This is a subroutine of see_handle_extensions_for_one_ref called + via htab_traverse. + SLOT contains the current use extension instruction. + B is the see_ref_s structure pointer. */ + +static int +see_merge_one_use_extension (void **slot, void *b) +{ + struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; + rtx use_se = *slot; + rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn : + curr_ref_s->insn; + rtx merged_ref_next = (curr_ref_s->merged_insn) ? + NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX; + rtx ref_copy = copy_rtx (ref); + rtx extension_set = single_set (use_se); + rtx extension_rhs = NULL; + rtx dest_extension_reg = see_get_extension_reg (use_se, 1); + rtx note = NULL; + rtx simplified_note = NULL; + + gcc_assert (use_se && curr_ref_s && extension_set); + + extension_rhs = SET_SRC (extension_set); + + /* In REG_EQUIV and REG_EQUAL notes that mention the register we need to + replace the uses of the dest_extension_reg with the rhs of the extension + instruction. This is necessary since there might not be an extension in + the path between the definition and the note when this optimization is + over. */ + note = find_reg_equal_equiv_note (ref_copy); + if (note) + { + simplified_note = simplify_replace_rtx (XEXP (note, 0), + dest_extension_reg, + extension_rhs); + if (rtx_equal_p (XEXP (note, 0), simplified_note)) + /* Replacement failed. Remove the note. */ + remove_note (ref_copy, note); + else + XEXP (note, 0) = simplified_note; + } + + if (!see_want_to_be_merged_with_extension (ref, use_se, USE_EXTENSION)) + { + /* The use in the reference is too simple. Don't try to merge. */ + if (dump_file) + { + fprintf (dump_file, "Use merge skipped!\n"); + fprintf (dump_file, "Original instructions:\n"); + print_rtl_single (dump_file, use_se); + print_rtl_single (dump_file, ref); + } + /* Don't remove the current use_se from the use_se_hash and continue to + the next extension. */ + return 1; + } + + validate_replace_src_group (dest_extension_reg, extension_rhs, ref_copy); + + if (!num_changes_pending ()) + /* In this case this is not a real use (the only use is/was in the notes + list). Remove the use extension from the hash. This will prevent it + from been emitted in the first place. */ + { + if (dump_file) + { + fprintf (dump_file, "Use extension not necessary before:\n"); + print_rtl_single (dump_file, ref); + } + htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot); + PREV_INSN (ref_copy) = NULL_RTX; + NEXT_INSN (ref_copy) = merged_ref_next; + if (merged_ref_next != NULL_RTX) + PREV_INSN (merged_ref_next) = ref_copy; + curr_ref_s->merged_insn = ref_copy; + return 1; + } + + if (!apply_change_group ()) + { + /* The merge failed. */ + if (dump_file) + { + fprintf (dump_file, "Use merge failed!\n"); + fprintf (dump_file, "Original instructions:\n"); + print_rtl_single (dump_file, use_se); + print_rtl_single (dump_file, ref); + } + /* Don't remove the current use_se from the use_se_hash and continue to + the next extension. */ + return 1; + } + + /* The merge succeeded! */ + + /* Try to simplify the new merged insn. */ + validate_simplify_insn (ref_copy); + + PREV_INSN (ref_copy) = NULL_RTX; + NEXT_INSN (ref_copy) = merged_ref_next; + if (merged_ref_next != NULL_RTX) + PREV_INSN (merged_ref_next) = ref_copy; + curr_ref_s->merged_insn = ref_copy; + + if (dump_file) + { + fprintf (dump_file, "Use merge succeeded!\n"); + fprintf (dump_file, "Original instructions:\n"); + print_rtl_single (dump_file, use_se); + print_rtl_single (dump_file, ref); + fprintf (dump_file, "Merged instruction:\n"); + print_rtl_single (dump_file, ref_copy); + } + + /* Remove the current use_se from the use_se_hash. This will prevent it from + been emitted in the first place. */ + htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot); + return 1; +} + + +/* Merge the reference instruction (ref) with the extension that follows it + in the same basic block (def_se). + ref sets a NARROWmode register and def_se extends it to WIDEmode register. + + The pattern we try to merge is this: + ref: set (dest_reg) (rhs) + def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg)) + + where dest_reg and source_extension_reg can both be subregs (togather) + and (REGNO (dest_reg) == REGNO (source_extension_reg)) + + The merge is done by generating, simplifying and recognizing the pattern: + set (dest_extension_reg) (sign/zero_extend (rhs)) + If ref is a parallel instruction we just replace the relevant set in it. + + If ref is too simple (according to see_want_to_be_merged_with_extension ()) + we don't try to merge it with def_se and we continue as if the merge failed. + + This is a subroutine of see_handle_extensions_for_one_ref called + via htab_traverse. + + SLOT contains the current def extension instruction. + B is the see_ref_s structure pointer. */ + +static int +see_merge_one_def_extension (void **slot, void *b) +{ + struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; + rtx def_se = *slot; + /* If the original insn was already merged with an extension before, + take the merged one. */ + rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn : + curr_ref_s->insn; + rtx merged_ref_next = (curr_ref_s->merged_insn) ? + NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX; + rtx ref_copy = copy_rtx (ref); + rtx new_set = NULL; + rtx source_extension_reg = see_get_extension_reg (def_se, 0); + rtx dest_extension_reg = see_get_extension_reg (def_se, 1); + rtx move_insn = NULL; + rtx *rtx_slot = NULL; + rtx subreg = NULL; + rtx temp_extension = NULL; + rtx simplified_temp_extension = NULL; + rtx *pat = NULL; + enum rtx_code code; + enum rtx_code extension_code; + enum machine_mode source_extension_mode; + enum machine_mode source_mode; + enum machine_mode dest_extension_mode; + bool merge_success = false; + int i; + + gcc_assert (def_se + && INSN_P (def_se) + && curr_ref_s + && ref + && INSN_P (ref)); + + if (!see_want_to_be_merged_with_extension (ref, def_se, DEF_EXTENSION)) + { + /* The definition in the reference is too simple. Don't try to merge. */ + if (dump_file) + { + fprintf (dump_file, "Def merge skipped!\n"); + fprintf (dump_file, "Original instructions:\n"); + print_rtl_single (dump_file, ref); + print_rtl_single (dump_file, def_se); + } + + see_def_extension_not_merged (curr_ref_s, def_se); + /* Continue to the next extension. */ + return 1; + } + + extension_code = see_get_extension_data (def_se, &source_mode); + + /* Try to merge and simplify the extension. */ + source_extension_mode = GET_MODE (source_extension_reg); + dest_extension_mode = GET_MODE (dest_extension_reg); + + pat = &PATTERN (ref_copy); + code = GET_CODE (*pat); + + if (code == PARALLEL) + { + bool need_to_apply_change = false; + + for (i = 0; i < XVECLEN (*pat, 0); i++) + { + rtx *sub = &XVECEXP (*pat, 0, i); + + if ((GET_CODE (*sub) == SET) + && (GET_MODE (SET_SRC (*sub)) != VOIDmode) + && (GET_MODE (SET_DEST (*sub)) == source_mode) + && ((REG_P (SET_DEST (*sub)) + && (REGNO (SET_DEST (*sub)) == REGNO (source_extension_reg))) + || ((GET_CODE (SET_DEST (*sub)) == SUBREG) + && (REG_P (SUBREG_REG (SET_DEST (*sub)))) + && (REGNO (SUBREG_REG (SET_DEST (*sub))) == + REGNO (source_extension_reg))))) + { + rtx orig_src = SET_SRC (*sub); + + if (extension_code == SIGN_EXTEND) + temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode, + orig_src); + else + temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode, + orig_src); + simplified_temp_extension = simplify_rtx (temp_extension); + temp_extension = + (simplified_temp_extension) ? simplified_temp_extension : + temp_extension; + new_set = gen_rtx_SET (VOIDmode, dest_extension_reg, + temp_extension); + validate_change (ref_copy, sub, new_set, 1); + need_to_apply_change = true; + } + } + if (need_to_apply_change) + if (apply_change_group ()) + merge_success = true; + } + else if ((code == SET) + && (GET_MODE (SET_SRC (*pat)) != VOIDmode) + && (GET_MODE (SET_DEST (*pat)) == source_mode) + && ((REG_P (SET_DEST (*pat)) + && REGNO (SET_DEST (*pat)) == REGNO (source_extension_reg)) + || ((GET_CODE (SET_DEST (*pat)) == SUBREG) + && (REG_P (SUBREG_REG (SET_DEST (*pat)))) + && (REGNO (SUBREG_REG (SET_DEST (*pat))) == + REGNO (source_extension_reg))))) + { + rtx orig_src = SET_SRC (*pat); + + if (extension_code == SIGN_EXTEND) + temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode, orig_src); + else + temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode, orig_src); + simplified_temp_extension = simplify_rtx (temp_extension); + temp_extension = (simplified_temp_extension) ? simplified_temp_extension : + temp_extension; + new_set = gen_rtx_SET (VOIDmode, dest_extension_reg, temp_extension); + if (validate_change (ref_copy, pat, new_set, 0)) + merge_success = true; + } + if (!merge_success) + { + /* The merge failed. */ + if (dump_file) + { + fprintf (dump_file, "Def merge failed!\n"); + fprintf (dump_file, "Original instructions:\n"); + print_rtl_single (dump_file, ref); + print_rtl_single (dump_file, def_se); + } + + see_def_extension_not_merged (curr_ref_s, def_se); + /* Continue to the next extension. */ + return 1; + } + + /* The merge succeeded! */ + + /* Create a simple move instruction to assure the correctness of the code. */ + subreg = gen_lowpart_SUBREG (source_extension_mode, dest_extension_reg); + start_sequence (); + emit_move_insn (source_extension_reg, subreg); + move_insn = get_insns (); + end_sequence (); + + /* Link the merged instruction to the newly created move instruction and + to the former created move instructions. */ + PREV_INSN (ref_copy) = NULL_RTX; + NEXT_INSN (ref_copy) = move_insn; + PREV_INSN (move_insn) = ref_copy; + NEXT_INSN (move_insn) = merged_ref_next; + if (merged_ref_next != NULL_RTX) + PREV_INSN (merged_ref_next) = move_insn; + curr_ref_s->merged_insn = ref_copy; + + if (dump_file) + { + fprintf (dump_file, "Def merge succeeded!\n"); + fprintf (dump_file, "Original instructions:\n"); + print_rtl_single (dump_file, ref); + print_rtl_single (dump_file, def_se); + fprintf (dump_file, "Merged instruction:\n"); + print_rtl_single (dump_file, ref_copy); + fprintf (dump_file, "Move instruction that was added:\n"); + print_rtl_single (dump_file, move_insn); + } + + /* Remove the current def_se from the unmerged_def_se_hash and insert it to + the merged_def_se_hash. */ + htab_clear_slot (curr_ref_s->unmerged_def_se_hash, (PTR *)slot); + if (!curr_ref_s->merged_def_se_hash) + curr_ref_s->merged_def_se_hash = + htab_create (10, hash_descriptor_extension, eq_descriptor_extension, + NULL); + rtx_slot = (rtx *) htab_find_slot (curr_ref_s->merged_def_se_hash, + dest_extension_reg, INSERT); + gcc_assert (*rtx_slot == NULL); + *rtx_slot = def_se; + + return 1; +} + + +/* Try to eliminate extensions in this order: + a. Try to merge only the def extensions, one by one. + b. Try to merge only the use extensions, one by one. + + TODO: + Try to merge any couple of use extensions simultaneously. + Try to merge any def extension with one or two uses extensions + simultaneously. + + After all the merges are done, update the register properties for the basic + block and eliminate locally redundant use extensions. + + This is a subroutine of see_merge_and_eliminate_extensions called + via splay_tree_foreach. + STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a + see_ref_s structure. */ + +static int +see_handle_extensions_for_one_ref (splay_tree_node stn, + void *data ATTRIBUTE_UNUSED) +{ + htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash; + htab_t unmerged_def_se_hash = + ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash; + htab_t merged_def_se_hash = NULL; + rtx ref = ((struct see_ref_s *) (stn->value))->insn; + + if (dump_file) + { + fprintf (dump_file, "Handling ref:\n"); + print_rtl_single (dump_file, ref); + } + + /* a. Try to eliminate only def extensions, one by one. */ + if (unmerged_def_se_hash) + htab_traverse_noresize (unmerged_def_se_hash, see_merge_one_def_extension, + (PTR) (stn->value)); + + if (use_se_hash) + /* b. Try to eliminate only use extensions, one by one. */ + htab_traverse_noresize (use_se_hash, see_merge_one_use_extension, + (PTR) (stn->value)); + + merged_def_se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash; + + if (dump_file) + { + fprintf (dump_file, "The hashes of the current reference:\n"); + if (unmerged_def_se_hash) + { + fprintf (dump_file, "unmerged_def_se_hash:\n"); + htab_traverse (unmerged_def_se_hash, see_print_one_extension, NULL); + } + if (merged_def_se_hash) + { + fprintf (dump_file, "merged_def_se_hash:\n"); + htab_traverse (merged_def_se_hash, see_print_one_extension, NULL); + } + if (use_se_hash) + { + fprintf (dump_file, "use_se_hash:\n"); + htab_traverse (use_se_hash, see_print_one_extension, NULL); + } + } + + /* Now that all the merges are done, update the register properties of the + basic block and eliminate locally redundant extensions. + It is important that we first traverse the use extensions hash and + afterwards the def extensions hashes. */ + + if (use_se_hash) + htab_traverse_noresize (use_se_hash, see_set_prop_unmerged_use, + (PTR) (stn->value)); + + if (unmerged_def_se_hash) + htab_traverse (unmerged_def_se_hash, see_set_prop_unmerged_def, + (PTR) (stn->value)); + + if (merged_def_se_hash) + htab_traverse (merged_def_se_hash, see_set_prop_merged_def, + (PTR) (stn->value)); + + /* Continue to the next definition. */ + return 0; +} + + +/* Phase 2 top level function. + In this phase, we try to merge def extensions and use extensions with their + references, and eliminate redundant extensions in the same basic block. + We also gather information for the next phases. */ + +static void +see_merge_and_eliminate_extensions (void) +{ + int i = 0; + + if (dump_file) + fprintf (dump_file, + "* Phase 2: Merge and eliminate locally redundant extensions. *\n"); + + /* Traverse over all the splay trees of the basic blocks. */ + for (i = 0; i < last_bb; i++) + { + if (see_bb_splay_ar[i]) + { + if (dump_file) + fprintf (dump_file, "Handling references for bb %d\n", i); + /* Traverse over all the references in the basic block in forward + order. */ + splay_tree_foreach (see_bb_splay_ar[i], + see_handle_extensions_for_one_ref, NULL); + } + } +} + + +/* Phase 1 implementation: Propagate extensions to uses. */ + +/* Insert REF_INSN into the splay tree of its basic block. + SE_INSN is the extension to store in the proper hash according to TYPE. + + Return true if everything went well. + Otherwise, return false (this will cause the optimization to be aborted). */ + +static bool +see_store_reference_and_extension (rtx ref_insn, rtx se_insn, + enum extension_type type) +{ + rtx *rtx_slot = NULL; + int curr_bb_num; + splay_tree_node stn = NULL; + htab_t se_hash = NULL; + struct see_ref_s *ref_s = NULL; + + /* Check the arguments. */ + gcc_assert (ref_insn && se_insn); + if (!see_bb_splay_ar) + return false; + + curr_bb_num = BLOCK_NUM (ref_insn); + gcc_assert ((curr_bb_num < last_bb) && (curr_bb_num >= 0)); + + /* Insert the reference to the splay tree of its basic block. */ + if (!see_bb_splay_ar[curr_bb_num]) + /* The splay tree for this block doesn't exist yet, create it. */ + see_bb_splay_ar[curr_bb_num] = splay_tree_new (splay_tree_compare_ints, + NULL, see_free_ref_s); + else + /* Splay tree already exists, check if the current reference is already + in it. */ + { + stn = splay_tree_lookup (see_bb_splay_ar[curr_bb_num], + DF_INSN_LUID (df, ref_insn)); + if (stn) + { + switch (type) + { + case EXPLICIT_DEF_EXTENSION: + se_hash = + ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash; + if (!se_hash) + { + se_hash = htab_create (10, hash_descriptor_extension, + eq_descriptor_extension, NULL); + ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash = + se_hash; + } + break; + case IMPLICIT_DEF_EXTENSION: + se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash; + if (!se_hash) + { + se_hash = htab_create (10, hash_descriptor_extension, + eq_descriptor_extension, NULL); + ((struct see_ref_s *) (stn->value))->merged_def_se_hash = + se_hash; + } + break; + case USE_EXTENSION: + se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash; + if (!se_hash) + { + se_hash = htab_create (10, hash_descriptor_extension, + eq_descriptor_extension, NULL); + ((struct see_ref_s *) (stn->value))->use_se_hash = se_hash; + } + break; + default: + gcc_unreachable (); + } + } + } + + /* Initialize a new see_ref_s structure and insert it to the splay + tree. */ + if (!stn) + { + ref_s = xmalloc (sizeof (struct see_ref_s)); + ref_s->luid = DF_INSN_LUID (df, ref_insn); + ref_s->insn = ref_insn; + ref_s->merged_insn = NULL; + + /* Initialize the hashes. */ + switch (type) + { + case EXPLICIT_DEF_EXTENSION: + ref_s->unmerged_def_se_hash = + htab_create (10, hash_descriptor_extension, eq_descriptor_extension, + NULL); + se_hash = ref_s->unmerged_def_se_hash; + ref_s->merged_def_se_hash = NULL; + ref_s->use_se_hash = NULL; + break; + case IMPLICIT_DEF_EXTENSION: + ref_s->merged_def_se_hash = + htab_create (10, hash_descriptor_extension, eq_descriptor_extension, + NULL); + se_hash = ref_s->merged_def_se_hash; + ref_s->unmerged_def_se_hash = NULL; + ref_s->use_se_hash = NULL; + break; + case USE_EXTENSION: + ref_s->use_se_hash = + htab_create (10, hash_descriptor_extension, eq_descriptor_extension, + NULL); + se_hash = ref_s->use_se_hash; + ref_s->unmerged_def_se_hash = NULL; + ref_s->merged_def_se_hash = NULL; + break; + default: + gcc_unreachable (); + } + } + + /* Insert the new extension instruction into the correct se_hash of the + current reference. */ + rtx_slot = (rtx *) htab_find_slot (se_hash, se_insn, INSERT); + if (*rtx_slot != NULL) + { + gcc_assert (type == USE_EXTENSION); + gcc_assert (rtx_equal_p (PATTERN (*rtx_slot), PATTERN (se_insn))); + } + else + *rtx_slot = se_insn; + + /* If this is a new reference, insert it into the splay_tree. */ + if (!stn) + splay_tree_insert (see_bb_splay_ar[curr_bb_num], + DF_INSN_LUID (df, ref_insn), (splay_tree_value) ref_s); + return true; +} + + +/* Go over all the defs, for each relevant definition (defined below) store its + instruction as a reference. + + A definition is relevant if its root has + ((entry_type == SIGN_EXTENDED_DEF) || (entry_type == ZERO_EXTENDED_DEF)) and + his source_mode is not narrower then the the roots source_mode. + + Return the number of relevant defs or negative number if something bad had + happened and the optimization should be aborted. */ + +static int +see_handle_relevant_defs (void) +{ + rtx insn = NULL; + rtx se_insn = NULL; + rtx reg = NULL; + rtx ref_insn = NULL; + struct web_entry *root_entry = NULL; + unsigned int i; + int num_relevant_defs = 0; + enum rtx_code extension_code; + + for (i = 0; i < defs_num; i++) + { + insn = DF_REF_INSN (DF_DEFS_GET (df, i)); + reg = DF_REF_REAL_REG (DF_DEFS_GET (df, i)); + + if (!insn) + continue; + + if (!INSN_P (insn)) + continue; + + root_entry = unionfind_root (&def_entry[i]); + + if ((ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF) + && (ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF)) + /* The current web is not relevant. Continue to the next def. */ + continue; + + if (root_entry->reg) + /* It isn't possible to have two different register for the same + web. */ + gcc_assert (rtx_equal_p (root_entry->reg, reg)); + else + root_entry->reg = reg; + + /* The current definition is an EXTENDED_DEF or a definition that its + source_mode is narrower then its web's source_mode. + This means that we need to generate the implicit extension explicitly + and store it in the current reference's merged_def_se_hash. */ + if ((ENTRY_EI (&def_entry[i])->local_relevancy == EXTENDED_DEF) + || (ENTRY_EI (&def_entry[i])->local_source_mode < + ENTRY_EI (root_entry)->source_mode)) + { + num_relevant_defs++; + + if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF) + extension_code = SIGN_EXTEND; + else + extension_code = ZERO_EXTEND; + + se_insn = + see_gen_normalized_extension (reg, extension_code, + ENTRY_EI (root_entry)->source_mode); + + /* This is a dummy extension, mark it as deleted. */ + INSN_DELETED_P (se_insn) = 1; + + if (!see_store_reference_and_extension (insn, se_insn, + IMPLICIT_DEF_EXTENSION)) + /* Something bad happened. Abort the optimization. */ + return -1; + continue; + } + + ref_insn = PREV_INSN (insn); + gcc_assert (BLOCK_NUM (ref_insn) == BLOCK_NUM (insn)); + + num_relevant_defs++; + + if (!see_store_reference_and_extension (ref_insn, insn, + EXPLICIT_DEF_EXTENSION)) + /* Something bad happened. Abort the optimization. */ + return -1; + } + return num_relevant_defs; +} + + +/* Go over all the uses, for each use in relevant web store its instruction as + a reference and generate an extension before it. + + Return the number of relevant uses or negative number if something bad had + happened and the optimization should be aborted. */ + +static int +see_handle_relevant_uses (void) +{ + rtx insn = NULL; + rtx reg = NULL; + struct web_entry *root_entry = NULL; + rtx se_insn = NULL; + unsigned int i; + int num_relevant_uses = 0; + enum rtx_code extension_code; + + for (i = 0; i < uses_num; i++) + { + insn = DF_REF_INSN (DF_USES_GET (df, i)); + reg = DF_REF_REAL_REG (DF_USES_GET (df, i)); + + if (!insn) + continue; + + if (!INSN_P (insn)) + continue; + + root_entry = unionfind_root (&use_entry[i]); + + if ((ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF) + && (ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF)) + /* The current web is not relevant. Continue to the next use. */ + continue; + + if (root_entry->reg) + /* It isn't possible to have two different register for the same + web. */ + gcc_assert (rtx_equal_p (root_entry->reg, reg)); + else + root_entry->reg = reg; + + /* Generate the use extension. */ + if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF) + extension_code = SIGN_EXTEND; + else + extension_code = ZERO_EXTEND; + + se_insn = + see_gen_normalized_extension (reg, extension_code, + ENTRY_EI (root_entry)->source_mode); + if (!se_insn) + /* This is very bad, abort the transformation. */ + return -1; + + num_relevant_uses++; + + if (!see_store_reference_and_extension (insn, se_insn, + USE_EXTENSION)) + /* Something bad happened. Abort the optimization. */ + return -1; + } + + return num_relevant_uses; +} + + +/* Updates the relevancy of all the uses. + The information of the i'th use is stored in use_entry[i]. + Currently all the uses are relevant for the optimization except for uses that + are in LIBCALL or RETVAL instructions. */ + +static void +see_update_uses_relevancy (void) +{ + rtx insn = NULL; + rtx reg = NULL; + struct see_entry_extra_info *curr_entry_extra_info; + enum entry_type et; + unsigned int i; + + if (!df || !use_entry) + return; + + for (i = 0; i < uses_num; i++) + { + + insn = DF_REF_INSN (DF_USES_GET (df, i)); + reg = DF_REF_REAL_REG (DF_USES_GET (df, i)); + + et = RELEVANT_USE; + + if (insn) + { + if (!INSN_P (insn)) + et = NOT_RELEVANT; + if (insn && find_reg_note (insn, REG_LIBCALL, NULL_RTX)) + et = NOT_RELEVANT; + if (find_reg_note (insn, REG_RETVAL, NULL_RTX)) + et = NOT_RELEVANT; + } + else + et = NOT_RELEVANT; + + if (dump_file) + { + fprintf (dump_file, "u%i insn %i reg %i ", + i, (insn ? INSN_UID (insn) : -1), REGNO (reg)); + if (et == NOT_RELEVANT) + fprintf (dump_file, "NOT RELEVANT \n"); + else + fprintf (dump_file, "RELEVANT USE \n"); + } + + curr_entry_extra_info = xmalloc (sizeof (struct see_entry_extra_info)); + curr_entry_extra_info->relevancy = et; + curr_entry_extra_info->local_relevancy = et; + use_entry[i].extra_info = curr_entry_extra_info; + use_entry[i].reg = NULL; + use_entry[i].pred = NULL; + } +} + + +/* A definition in a candidate for this optimization only if its pattern is + recognized as relevant in this function. + INSN is the instruction to be recognized. + +- If this is the pattern of a common sign extension after definition: + PREV_INSN (INSN): def (reg:NARROWmode r) + INSN: set ((reg:WIDEmode r') + (sign_extend:WIDEmode (reg:NARROWmode r))) + return SIGN_EXTENDED_DEF and set SOURCE_MODE to NARROWmode. + +- If this is the pattern of a common zero extension after definition: + PREV_INSN (INSN): def (reg:NARROWmode r) + INSN: set ((reg:WIDEmode r') + (zero_extend:WIDEmode (reg:NARROWmode r))) + return ZERO_EXTENDED_DEF and set SOURCE_MODE to NARROWmode. + +- Otherwise, + + For the pattern: + INSN: set ((reg:WIDEmode r) (sign_extend:WIDEmode (...expr...))) + return EXTENDED_DEF and set SOURCE_MODE to the mode of expr. + + For the pattern: + INSN: set ((reg:WIDEmode r) (zero_extend:WIDEmode (...expr...))) + return EXTENDED_DEF and set SOURCE_MODE_UNSIGNED to the mode of expr. + + For the pattern: + INSN: set ((reg:WIDEmode r) (CONST_INT (...))) + return EXTENDED_DEF and set SOURCE_MODE(_UNSIGNED) to the narrowest mode that + is implicitly sign(zero) extended to WIDEmode in the INSN. + +- FIXME: Extensions that are not adjacent to their definition and EXTENDED_DEF + that is part of a PARALLEL instruction are not handled. + These restriction can be relaxed. */ + +static enum entry_type +see_analyze_one_def (rtx insn, enum machine_mode *source_mode, + enum machine_mode *source_mode_unsigned) +{ + enum rtx_code extension_code; + rtx rhs = NULL; + rtx lhs = NULL; + rtx set = NULL; + rtx source_register = NULL; + rtx prev_insn = NULL; + rtx next_insn = NULL; + enum machine_mode mode; + enum machine_mode next_source_mode; + HOST_WIDE_INT val = 0; + HOST_WIDE_INT val2 = 0; + int i = 0; + + *source_mode = MAX_MACHINE_MODE; + *source_mode_unsigned = MAX_MACHINE_MODE; + + if (!insn) + return NOT_RELEVANT; + + if (!INSN_P (insn)) + return NOT_RELEVANT; + + extension_code = see_get_extension_data (insn, source_mode); + switch (extension_code) + { + case SIGN_EXTEND: + case ZERO_EXTEND: + source_register = see_get_extension_reg (insn, 0); + /* FIXME: This restriction can be relaxed. The only thing that is + important is that the reference would be inside the same basic block + as the extension. */ + prev_insn = PREV_INSN (insn); + if (!prev_insn || !INSN_P (prev_insn)) + return NOT_RELEVANT; + + if (!reg_set_between_p (source_register, PREV_INSN (prev_insn), insn)) + return NOT_RELEVANT; + + if (find_reg_note (prev_insn, REG_LIBCALL, NULL_RTX)) + return NOT_RELEVANT; + + if (find_reg_note (prev_insn, REG_RETVAL, NULL_RTX)) + return NOT_RELEVANT; + + /* If we can't use copy_rtx on the reference it can't be a reference. */ + if ((GET_CODE (PATTERN (prev_insn)) == PARALLEL) + && (asm_noperands (PATTERN (prev_insn)) >= 0)) + return NOT_RELEVANT; + + /* Now, check if this extension is a reference itself. If so, it is not + relevant. Handling this extension as relevant would make things much + more complicated. */ + next_insn = NEXT_INSN (insn); + if (prev_insn + && INSN_P (prev_insn) + && (see_get_extension_data (next_insn, &next_source_mode) != + NOT_RELEVANT)) + { + rtx curr_dest_register = see_get_extension_reg (insn, 1); + rtx next_source_register = see_get_extension_reg (next_insn, 0); + + if (REGNO (curr_dest_register) == REGNO (next_source_register)) + return NOT_RELEVANT; + } + + if (extension_code == SIGN_EXTEND) + return SIGN_EXTENDED_DEF; + else + return ZERO_EXTENDED_DEF; + + case UNKNOWN: + /* This may still be an EXTENDED_DEF. */ + + /* FIXME: This restriction can be relaxed. It is possible to handle + PARALLEL insns too. */ + set = single_set (insn); + if (!set) + return NOT_RELEVANT; + rhs = SET_SRC (set); + lhs = SET_DEST (set); + + /* Don't handle extensions to something other then register or + subregister. */ + if (!REG_P (lhs) && !SUBREG_REG (lhs)) + return NOT_RELEVANT; + + switch (GET_CODE (rhs)) + { + case (SIGN_EXTEND): + *source_mode = GET_MODE (XEXP (rhs, 0)); + *source_mode_unsigned = MAX_MACHINE_MODE; + return EXTENDED_DEF; + case (ZERO_EXTEND): + *source_mode = MAX_MACHINE_MODE; + *source_mode_unsigned = GET_MODE (XEXP (rhs, 0)); + return EXTENDED_DEF; + case (CONST_INT): + + val = INTVAL (rhs); + + /* Find the narrowest mode, val could fit into. */ + for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT), i = 0; + GET_MODE_BITSIZE (mode) < BITS_PER_WORD; + mode = GET_MODE_WIDER_MODE (mode), i++) + { + val2 = trunc_int_for_mode (val, mode); + if ((val2 == val) && (*source_mode == MAX_MACHINE_MODE)) + *source_mode = mode; + if ((val == (val & (HOST_WIDE_INT)GET_MODE_MASK (mode))) + && (*source_mode_unsigned == MAX_MACHINE_MODE)) + *source_mode_unsigned = mode; + if ((*source_mode != MAX_MACHINE_MODE) + && (*source_mode_unsigned !=MAX_MACHINE_MODE)) + return EXTENDED_DEF; + } + if ((*source_mode != MAX_MACHINE_MODE) + || (*source_mode_unsigned !=MAX_MACHINE_MODE)) + return EXTENDED_DEF; + return NOT_RELEVANT; + default: + return NOT_RELEVANT; + } + default: + gcc_unreachable (); + } +} + + +/* Updates the relevancy and source_mode of all the definitions. + The information of the i'th definition is stored in def_entry[i]. */ + +static void +see_update_defs_relevancy (void) +{ + struct see_entry_extra_info *curr_entry_extra_info; + unsigned int i; + rtx insn = NULL; + rtx reg = NULL; + enum entry_type et; + enum machine_mode source_mode; + enum machine_mode source_mode_unsigned; + + if (!df || !def_entry) + return; + + for (i = 0; i < defs_num; i++) + { + insn = DF_REF_INSN (DF_DEFS_GET (df, i)); + reg = DF_REF_REAL_REG (DF_DEFS_GET (df, i)); + + et = see_analyze_one_def (insn, &source_mode, &source_mode_unsigned); + + curr_entry_extra_info = xmalloc (sizeof (struct see_entry_extra_info)); + curr_entry_extra_info->relevancy = et; + curr_entry_extra_info->local_relevancy = et; + if (et != EXTENDED_DEF) + { + curr_entry_extra_info->source_mode = source_mode; + curr_entry_extra_info->local_source_mode = source_mode; + } + else + { + curr_entry_extra_info->source_mode_signed = source_mode; + curr_entry_extra_info->source_mode_unsigned = source_mode_unsigned; + } + def_entry[i].extra_info = curr_entry_extra_info; + def_entry[i].reg = NULL; + def_entry[i].pred = NULL; + + if (dump_file) + { + if (et == NOT_RELEVANT) + { + fprintf (dump_file, "d%i insn %i reg %i ", + i, (insn ? INSN_UID (insn) : -1), REGNO (reg)); + fprintf (dump_file, "NOT RELEVANT \n"); + } + else + { + fprintf (dump_file, "d%i insn %i reg %i ", + i ,INSN_UID (insn), REGNO (reg)); + fprintf (dump_file, "RELEVANT - "); + switch (et) + { + case SIGN_EXTENDED_DEF : + fprintf (dump_file, "SIGN_EXTENDED_DEF, source_mode = %s\n", + GET_MODE_NAME (source_mode)); + break; + case ZERO_EXTENDED_DEF : + fprintf (dump_file, "ZERO_EXTENDED_DEF, source_mode = %s\n", + GET_MODE_NAME (source_mode)); + break; + case EXTENDED_DEF : + fprintf (dump_file, "EXTENDED_DEF, "); + if ((source_mode != MAX_MACHINE_MODE) + && (source_mode_unsigned != MAX_MACHINE_MODE)) + { + fprintf (dump_file, "positive const, "); + fprintf (dump_file, "source_mode_signed = %s, ", + GET_MODE_NAME (source_mode)); + fprintf (dump_file, "source_mode_unsigned = %s\n", + GET_MODE_NAME (source_mode_unsigned)); + } + else if (source_mode != MAX_MACHINE_MODE) + fprintf (dump_file, "source_mode_signed = %s\n", + GET_MODE_NAME (source_mode)); + else + fprintf (dump_file, "source_mode_unsigned = %s\n", + GET_MODE_NAME (source_mode_unsigned)); + break; + default : + gcc_unreachable (); + } + } + } + } +} + + +/* Phase 1 top level function. + In this phase the relevancy of all the definitions and uses are checked, + later the webs are produces and the extensions are generated. + These extensions are not emitted yet into the insns stream. + + returns true if at list one relevant web was found and there were no + problems, otherwise return false. */ + +static bool +see_propagate_extensions_to_uses (void) +{ + unsigned int i = 0; + int num_relevant_uses; + int num_relevant_defs; + + if (dump_file) + fprintf (dump_file, + "* Phase 1: Propagate extensions to uses. *\n"); + + /* Update the relevancy of references using the DF object. */ + see_update_defs_relevancy (); + see_update_uses_relevancy (); + + /* Produce the webs and update the extra_info of the root. + In general, a web is relevant if all its definitions and uses are relevant + and there is at least one definition that was marked as SIGN_EXTENDED_DEF + or ZERO_EXTENDED_DEF. */ + for (i = 0; i < uses_num; i++) + { + union_defs (df, DF_USES_GET (df, i), def_entry, use_entry, + see_update_leader_extra_info); + } + + /* Generate use extensions for references and insert these + references to see_bb_splay_ar data structure. */ + num_relevant_uses = see_handle_relevant_uses (); + + if (num_relevant_uses < 0) + return false; + + /* Store the def extensions in their references structures and insert these + references to see_bb_splay_ar data structure. */ + num_relevant_defs = see_handle_relevant_defs (); + + if (num_relevant_defs < 0) + return false; + + return ((num_relevant_uses > 0) || (num_relevant_defs > 0)); +} + + +/* Main entry point for the sign extension elimination optimization. */ + +void +see_main (void) +{ + bool cont = false; + int i = 0; + + /* Initialize global data structures. */ + see_initialize_data_structures (); + + /* Phase 1: Propagate extensions to uses. */ + cont = see_propagate_extensions_to_uses (); + + if (cont) + { + init_recog (); + + /* Phase 2: Merge and eliminate locally redundant extensions. */ + see_merge_and_eliminate_extensions (); + + /* Phase 3: Eliminate globally redundant extensions. */ + see_execute_LCM (); + + /* Phase 4: Commit changes to the insn stream. */ + see_commit_changes (); + + if (dump_file) + { + /* For debug purpose only. */ + fprintf (dump_file, "see_pre_extension_hash:\n"); + htab_traverse (see_pre_extension_hash, see_print_pre_extension_expr, + NULL); + + for (i = 0; i < last_bb; i++) + { + if (see_bb_hash_ar[i]) + /* Traverse over all the references in the basic block in + forward order. */ + { + fprintf (dump_file, + "Searching register properties in bb %d\n", i); + htab_traverse (see_bb_hash_ar[i], + see_print_register_properties, NULL); + } + } + } + } + + /* Free global data structures. */ + see_free_data_structures (); +} + + +static bool +gate_handle_see (void) +{ + return ((optimize > 1) && flag_see); +} + +static unsigned int +rest_of_handle_see (void) +{ + int no_new_pseudos_bcp = no_new_pseudos; + + no_new_pseudos = 0; + see_main (); + no_new_pseudos = no_new_pseudos_bcp; + + delete_trivially_dead_insns (get_insns (), max_reg_num ()); + update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES, + (PROP_DEATH_NOTES)); + cleanup_cfg (CLEANUP_EXPENSIVE); + reg_scan (get_insns (), max_reg_num ()); + + return 0; +} + +struct tree_opt_pass pass_see = +{ + "see", /* name */ + gate_handle_see, /* gate */ + rest_of_handle_see, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_SEE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func, /* todo_flags_finish */ + 'u' /* letter */ +}; + diff --git a/gcc/timevar.def b/gcc/timevar.def index 32388e8..28b0b76 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -145,6 +145,7 @@ DEFTIMEVAR (TV_VPT , "value profile opts") DEFTIMEVAR (TV_FLOW , "flow analysis") DEFTIMEVAR (TV_COMBINE , "combiner") DEFTIMEVAR (TV_IFCVT , "if-conversion") +DEFTIMEVAR (TV_SEE , "see") DEFTIMEVAR (TV_REGMOVE , "regmove") DEFTIMEVAR (TV_MODE_SWITCH , "mode switching") DEFTIMEVAR (TV_SMS , "sms modulo scheduling") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 134c457..3121735 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -356,6 +356,7 @@ extern struct tree_opt_pass pass_partition_blocks; extern struct tree_opt_pass pass_regmove; extern struct tree_opt_pass pass_split_all_insns; extern struct tree_opt_pass pass_mode_switching; +extern struct tree_opt_pass pass_see; extern struct tree_opt_pass pass_recompute_reg_usage; extern struct tree_opt_pass pass_sms; extern struct tree_opt_pass pass_sched; diff --git a/gcc/web.c b/gcc/web.c index b729be7..7f5ae67 100644 --- a/gcc/web.c +++ b/gcc/web.c @@ -61,25 +61,12 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "tree-pass.h" -/* This entry is allocated for each reference in the insn stream. */ -struct web_entry -{ - /* Pointer to the parent in the union/find tree. */ - struct web_entry *pred; - /* Newly assigned register to the entry. Set only for roots. */ - rtx reg; -}; - -static struct web_entry *unionfind_root (struct web_entry *); -static void unionfind_union (struct web_entry *, struct web_entry *); -static void union_defs (struct df *, struct df_ref *, struct web_entry *, - struct web_entry *); static rtx entry_register (struct web_entry *, struct df_ref *, char *); static void replace_ref (struct df_ref *, rtx); /* Find the root of unionfind tree (the representative of set). */ -static struct web_entry * +struct web_entry * unionfind_root (struct web_entry *element) { struct web_entry *element1 = element, *element2; @@ -95,30 +82,48 @@ unionfind_root (struct web_entry *element) return element; } -/* Union sets. */ +/* Union sets. + Return true if FIRST and SECOND points to the same web entry structure and + nothing is done. Otherwise, return false. */ -static void +bool unionfind_union (struct web_entry *first, struct web_entry *second) { first = unionfind_root (first); second = unionfind_root (second); if (first == second) - return; + return true; second->pred = first; + return false; } /* For each use, all possible defs reaching it must come in the same - register, union them. */ + register, union them. + FUN is the function that does the union. */ -static void +void union_defs (struct df *df, struct df_ref *use, struct web_entry *def_entry, - struct web_entry *use_entry) + struct web_entry *use_entry, + bool (*fun) (struct web_entry *, struct web_entry *)) { rtx insn = DF_REF_INSN (use); struct df_link *link = DF_REF_CHAIN (use); - struct df_ref *use_link = DF_INSN_USES (df, insn); - struct df_ref *def_link = DF_INSN_DEFS (df, insn); - rtx set = single_set (insn); + struct df_ref *use_link; + struct df_ref *def_link; + rtx set; + + if (insn) + { + use_link = DF_INSN_USES (df, insn); + def_link = DF_INSN_DEFS (df, insn); + set = single_set (insn); + } + else + { + use_link = NULL; + def_link = NULL; + set = NULL; + } /* Some instructions may use match_dup for their operands. In case the operands are dead, we will assign them different pseudos, creating @@ -129,8 +134,8 @@ union_defs (struct df *df, struct df_ref *use, struct web_entry *def_entry, { if (use != use_link && DF_REF_REAL_REG (use) == DF_REF_REAL_REG (use_link)) - unionfind_union (use_entry + DF_REF_ID (use), - use_entry + DF_REF_ID (use_link)); + (*fun) (use_entry + DF_REF_ID (use), + use_entry + DF_REF_ID (use_link)); use_link = use_link->next_ref; } @@ -145,15 +150,15 @@ union_defs (struct df *df, struct df_ref *use, struct web_entry *def_entry, while (def_link) { if (DF_REF_REAL_REG (use) == DF_REF_REAL_REG (def_link)) - unionfind_union (use_entry + DF_REF_ID (use), - def_entry + DF_REF_ID (def_link)); + (*fun) (use_entry + DF_REF_ID (use), + def_entry + DF_REF_ID (def_link)); def_link = def_link->next_ref; } } while (link) { - unionfind_union (use_entry + DF_REF_ID (use), - def_entry + DF_REF_ID (link->ref)); + (*fun) (use_entry + DF_REF_ID (use), + def_entry + DF_REF_ID (link->ref)); link = link->next; } @@ -161,13 +166,18 @@ union_defs (struct df *df, struct df_ref *use, struct web_entry *def_entry, register. Find it and union. */ if (use->flags & DF_REF_READ_WRITE) { - struct df_ref *link = DF_INSN_DEFS (df, DF_REF_INSN (use)); + struct df_ref *link; + + if (DF_REF_INSN (use)) + link = DF_INSN_DEFS (df, DF_REF_INSN (use)); + else + link = NULL; while (link) { if (DF_REF_REAL_REG (link) == DF_REF_REAL_REG (use)) - unionfind_union (use_entry + DF_REF_ID (use), - def_entry + DF_REF_ID (link)); + (*fun) (use_entry + DF_REF_ID (use), + def_entry + DF_REF_ID (link)); link = link->next_ref; } } @@ -258,7 +268,7 @@ web_main (void) /* Produce the web. */ for (i = 0; i < DF_USES_SIZE (df); i++) - union_defs (df, DF_USES_GET (df, i), def_entry, use_entry); + union_defs (df, DF_USES_GET (df, i), def_entry, use_entry, unionfind_union); /* Update the instruction stream, allocating new registers for split pseudos in progress. */