From: Zack Weinberg Date: Mon, 10 Sep 2001 22:34:03 +0000 (+0000) Subject: cpplex.c (parse_identifier): Fast-path optimize. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2c3fcba6dc56f848a07332ff005e2db9054dca26;p=platform%2Fupstream%2Fgcc.git cpplex.c (parse_identifier): Fast-path optimize. * cpplex.c (parse_identifier): Fast-path optimize. Avoid copying identifier when we're just going to throw it away. (parse_identifier_slow): New routine to handle abnormal cases. (_cpp_lex_token): Update call site. * hashtable.c (ht_lookup): Don't assume that the string we've been given is NUL-terminated. * system.h: #define __builtin_expect(a, b) to (a) if not GCC >=3.0. From-SVN: r45529 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b2edebd..fed56b5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2001-09-10 Zack Weinberg + + * cpplex.c (parse_identifier): Fast-path optimize. Avoid + copying identifier when we're just going to throw it away. + (parse_identifier_slow): New routine to handle abnormal cases. + (_cpp_lex_token): Update call site. + + * hashtable.c (ht_lookup): Don't assume that the string we've + been given is NUL-terminated. + * system.h: #define __builtin_expect(a, b) to (a) if not + GCC >=3.0. + 2001-09-10 Michael Meissner * config.gcc (sparc64-*-solaris2): Add alias to be compatible with @@ -16,7 +28,7 @@ Mon Sep 10 16:26:44 2001 Richard Kenner * dwarf2out.c (incomplete_types, decl_scope_table): Make them into varray's and register them as roots with the garbage - collector so they are not collected too soon. + collector so they are not collected too soon. Mon Sep 10 14:21:26 CEST 2001 Jan Hubicka @@ -31,7 +43,7 @@ Mon Sep 10 14:21:26 CEST 2001 Jan Hubicka (basic_block_for_insn, label_value_list): Move from flow.c; make global. (n_basic_blocks, n_edges, basic_block_info, entry_exit_blocks, init_flow, clear_edges, can_delete_note_p, can_delete_label_p, - flow_delete_insn, flow_delete_insn_chain, create_basic_block, + flow_delete_insn, flow_delete_insn_chain, create_basic_block, expunge_block, flow_delete_block, compute_bb_for_insn, update_bb_for_insn, set_block_for_insn, set_block_for_new_insns, make_edge, remove_edge, redirect_edge_succ, redirect_edge_succ_nodup, @@ -40,7 +52,7 @@ Mon Sep 10 14:21:26 CEST 2001 Jan Hubicka redirect_edge_and_branch, redirect_edge_and_branch_force, tidy_fallthru_edge, tidy_fallthru_edges, back_edge_of_syntactic_loop_p, split_edge, insert_insn_on_edge, commit_one_edge_insertion, - commit_edge_insertions, dump_flow_info, debug_flow_info, + commit_edge_insertions, dump_flow_info, debug_flow_info, dump_edge_info, dump_bb, debug_bb, debug_bb_n, print_rtl_with_bb, verify_flow_info, purge_dead_edges, purge_all_dead_edges): Move here from flow.c diff --git a/gcc/cpplex.c b/gcc/cpplex.c index 03bd855..071cdca 100644 --- a/gcc/cpplex.c +++ b/gcc/cpplex.c @@ -88,7 +88,9 @@ static int skip_block_comment PARAMS ((cpp_reader *)); static int skip_line_comment PARAMS ((cpp_reader *)); static void adjust_column PARAMS ((cpp_reader *)); static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t)); -static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t)); +static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *)); +static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *, + const U_CHAR *)); static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int)); static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *)); static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t)); @@ -470,40 +472,101 @@ name_p (pfile, string) return 1; } -/* Parse an identifier, skipping embedded backslash-newlines. - Calculate the hash value of the token while parsing, for improved - performance. The hashing algorithm *must* match cpp_lookup(). */ +/* Parse an identifier, skipping embedded backslash-newlines. This is + a critical inner loop. The common case is an identifier which has + not been split by backslash-newline, does not contain a dollar + sign, and has already been scanned (roughly 10:1 ratio of + seen:unseen identifiers in normal code; the distribution is + Poisson-like). Second most common case is a new identifier, not + split and no dollar sign. The other possibilities are rare and + have been relegated to parse_identifier_slow. */ static cpp_hashnode * -parse_identifier (pfile, c) +parse_identifier (pfile) cpp_reader *pfile; - cppchar_t c; { cpp_hashnode *result; + const U_CHAR *cur, *rlimit; + + /* Fast-path loop. Skim over a normal identifier. + N.B. ISIDNUM does not include $. */ + cur = pfile->buffer->cur - 1; + rlimit = pfile->buffer->rlimit; + do + cur++; + while (cur < rlimit && ISIDNUM (*cur)); + + /* Check for slow-path cases. */ + if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$')) + result = parse_identifier_slow (pfile, cur); + else + { + const U_CHAR *base = pfile->buffer->cur - 1; + result = (cpp_hashnode *) + ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC); + pfile->buffer->cur = cur; + } + + /* Rarely, identifiers require diagnostics when lexed. + XXX Has to be forced out of the fast path. */ + if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) + && !pfile->state.skipping, 0)) + { + /* It is allowed to poison the same identifier twice. */ + if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) + cpp_error (pfile, "attempt to use poisoned \"%s\"", + NODE_NAME (result)); + + /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the + replacement list of a variadic macro. */ + if (result == pfile->spec_nodes.n__VA_ARGS__ + && !pfile->state.va_args_ok) + cpp_pedwarn (pfile, + "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro"); + } + + return result; +} + +/* Slow path. This handles identifiers which have been split, and + identifiers which contain dollar signs. The part of the identifier + from PFILE->buffer->cur-1 to CUR has already been scanned. */ +static cpp_hashnode * +parse_identifier_slow (pfile, cur) + cpp_reader *pfile; + const U_CHAR *cur; +{ cpp_buffer *buffer = pfile->buffer; - unsigned int saw_dollar = 0, len; + const U_CHAR *base = buffer->cur - 1; struct obstack *stack = &pfile->hash_table->stack; + unsigned int c, saw_dollar = 0, len; + + /* Copy the part of the token which is known to be okay. */ + obstack_grow (stack, base, cur - base); + /* Now process the part which isn't. We are looking at one of + '$', '\\', or '?' on entry to this loop. */ + c = *cur++; + buffer->cur = cur; do { - do - { - obstack_1grow (stack, c); + while (is_idchar (c)) + { + obstack_1grow (stack, c); - if (c == '$') - saw_dollar++; + if (c == '$') + saw_dollar++; - c = EOF; - if (buffer->cur == buffer->rlimit) - break; + c = EOF; + if (buffer->cur == buffer->rlimit) + break; - c = *buffer->cur++; - } - while (is_idchar (c)); + c = *buffer->cur++; + } /* Potential escaped newline? */ if (c != '?' && c != '\\') - break; + break; c = skip_escaped_newlines (pfile, c); } while (is_idchar (c)); @@ -521,26 +584,8 @@ parse_identifier (pfile, c) len = obstack_object_size (stack); obstack_1grow (stack, '\0'); - /* This routine commits the memory if necessary. */ - result = (cpp_hashnode *) + return (cpp_hashnode *) ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED); - - /* Some identifiers require diagnostics when lexed. */ - if (result->flags & NODE_DIAGNOSTIC && !pfile->state.skipping) - { - /* It is allowed to poison the same identifier twice. */ - if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) - cpp_error (pfile, "attempt to use poisoned \"%s\"", - NODE_NAME (result)); - - /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the - replacement list of a variadic macro. */ - if (result == pfile->spec_nodes.n__VA_ARGS__ - && !pfile->state.va_args_ok) - cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro"); - } - - return result; } /* Parse a number, skipping embedded backslash-newlines. */ @@ -1003,14 +1048,17 @@ _cpp_lex_token (pfile, result) case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': result->type = CPP_NAME; - result->val.node = parse_identifier (pfile, c); + result->val.node = parse_identifier (pfile); /* 'L' may introduce wide characters or strings. */ if (result->val.node == pfile->spec_nodes.n_L) { - c = buffer->read_ahead; /* For make_string. */ + c = buffer->read_ahead; + if (c == EOF && buffer->cur < buffer->rlimit) + c = *buffer->cur; if (c == '\'' || c == '"') { + buffer->cur++; ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR); goto make_string; } diff --git a/gcc/hashtable.c b/gcc/hashtable.c index 7d0359a..bd2b137 100644 --- a/gcc/hashtable.c +++ b/gcc/hashtable.c @@ -162,7 +162,7 @@ ht_lookup (table, str, len, insert) HT_LEN (node) = len; if (insert == HT_ALLOC) - HT_STR (node) = obstack_copy (&table->stack, str, len + 1); + HT_STR (node) = obstack_copy0 (&table->stack, str, len); else HT_STR (node) = str; diff --git a/gcc/system.h b/gcc/system.h index 6b58083..7938fb6 100644 --- a/gcc/system.h +++ b/gcc/system.h @@ -490,6 +490,13 @@ extern void abort PARAMS ((void)); #endif /* ! __FUNCTION__ */ #endif +/* __builtin_expect(A, B) evaluates to A, but notifies the compiler that + the most likely value of A is B. This feature was added at some point + between 2.95 and 3.0. Let's use 3.0 as the lower bound for now. */ +#if (GCC_VERSION < 3000) +#define __builtin_expect(a, b) (a) +#endif + /* Provide some sort of boolean type. We use stdbool.h if it's available. This must be after all inclusion of system headers, as some of them will mess us up. */