From f39eef7b5d2539cffa1149b0bbeb75f513a4ace3 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 6 Jan 2004 22:12:27 +0000 Subject: [PATCH] Update. 2004-01-05 Jakub Jelinek * posix/regcomp.c (regcomp): Fix comment typo. (regfree): Free preg->translate, clear buffer, allocated, fastmap and translate fields. * posix/regcomp.c (build_charclass, buld_charclass_op): Change first argument to unsigned RE_TRANSLATE_TYPE. * posix/regex_internal.h (re_string_t): Change trans type to unsigned RE_TRANSLATE_TYPE. * posix/regex_internal.c (re_string_construct_common): Cast trans to unsigned RE_TRANSLATE_TYPE. (re_string_peek_byte_case, re_string_fetch_byte_case): Avoid fast path if pstr->trans. Never translate the character through pstr->trans. * posix/Makefile (tests): Add bug-regex22. (bug-regex22-ENV): Set. * posix/bug-regex22.c: New test. --- ChangeLog | 19 +++++++++ posix/Makefile | 7 ++-- posix/bug-regex22.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++ posix/regex_internal.c | 19 +++------ posix/regex_internal.h | 2 +- 5 files changed, 138 insertions(+), 18 deletions(-) create mode 100644 posix/bug-regex22.c diff --git a/ChangeLog b/ChangeLog index b447f5c..07a016e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +2004-01-05 Jakub Jelinek + + * posix/regcomp.c (regcomp): Fix comment typo. + (regfree): Free preg->translate, clear buffer, allocated, fastmap + and translate fields. + + * posix/regcomp.c (build_charclass, buld_charclass_op): Change first + argument to unsigned RE_TRANSLATE_TYPE. + * posix/regex_internal.h (re_string_t): Change trans type to + unsigned RE_TRANSLATE_TYPE. + * posix/regex_internal.c (re_string_construct_common): Cast + trans to unsigned RE_TRANSLATE_TYPE. + (re_string_peek_byte_case, re_string_fetch_byte_case): Avoid fast + path if pstr->trans. Never translate the character through + pstr->trans. + * posix/Makefile (tests): Add bug-regex22. + (bug-regex22-ENV): Set. + * posix/bug-regex22.c: New test. + 2004-01-02 Paolo Bonzini * posix/regex_internal.c (re_node_set_add_intersect, diff --git a/posix/Makefile b/posix/Makefile index 95f34f9..bfde636 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -1,4 +1,4 @@ -# Copyright (C) 1991-1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. +# Copyright (C) 1991-1999, 2000-2003, 2004 Free Software Foundation, Inc. # This file is part of the GNU C Library. # The GNU C Library is free software; you can redistribute it and/or @@ -79,8 +79,8 @@ tests := tstgetopt testfnm runtests runptests \ bug-regex8 bug-regex9 bug-regex10 bug-regex11 bug-regex12 \ bug-regex13 bug-regex14 bug-regex15 bug-regex16 \ bug-regex17 bug-regex18 bug-regex19 bug-regex20 \ - bug-regex21 tst-nice tst-nanosleep transbug tst-rxspencer \ - tst-pcre tst-boost + bug-regex21 bug-regex22 tst-nice tst-nanosleep \ + transbug tst-rxspencer tst-pcre tst-boost ifeq (yes,$(build-shared)) test-srcs := globtest tests += wordexp-test tst-exec tst-spawn @@ -162,6 +162,7 @@ bug-regex17-ENV = LOCPATH=$(common-objpfx)localedata bug-regex18-ENV = LOCPATH=$(common-objpfx)localedata bug-regex19-ENV = LOCPATH=$(common-objpfx)localedata bug-regex20-ENV = LOCPATH=$(common-objpfx)localedata +bug-regex22-ENV = LOCPATH=$(common-objpfx)localedata tst-rxspencer-ARGS = --utf8 rxspencer/tests tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata tst-pcre-ARGS = PCRE.tests diff --git a/posix/bug-regex22.c b/posix/bug-regex22.c new file mode 100644 index 0000000..4d8357c --- /dev/null +++ b/posix/bug-regex22.c @@ -0,0 +1,109 @@ +/* Test re.translate != NULL. + Copyright (C) 2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek , 2004. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include +#include + +int +main (void) +{ + struct re_pattern_buffer re; + char trans[256]; + int i, result = 0; + const char *s; + + setlocale (LC_ALL, "de_DE.ISO-8859-1"); + + for (i = 0; i < 256; ++i) + trans[i] = tolower (i); + + re_set_syntax (RE_SYNTAX_POSIX_EGREP); + + memset (&re, 0, sizeof (re)); + re.translate = trans; + s = re_compile_pattern ("\\W", 2, &re); + + if (s != NULL) + { + printf ("failed to compile pattern \"\\W\": %s\n", s); + result = 1; + } + else + { + int ret = re_search (&re, "abc.de", 6, 0, 6, NULL); + if (ret != 3) + { + printf ("1st re_search returned %d\n", ret); + result = 1; + } + + ret = re_search (&re, "\xc4\xd6\xae\xf7", 4, 0, 4, NULL); + if (ret != 2) + { + printf ("2nd re_search returned %d\n", ret); + result = 1; + } + re.translate = NULL; + regfree (&re); + } + + memset (&re, 0, sizeof (re)); + re.translate = trans; + s = re_compile_pattern ("\\w", 2, &re); + + if (s != NULL) + { + printf ("failed to compile pattern \"\\w\": %s\n", s); + result = 1; + } + else + { + int ret = re_search (&re, ".,!abc", 6, 0, 6, NULL); + if (ret != 3) + { + printf ("3rd re_search returned %d\n", ret); + result = 1; + } + + ret = re_search (&re, "\xae\xf7\xc4\xd6", 4, 0, 4, NULL); + if (ret != 2) + { + printf ("4th re_search returned %d\n", ret); + result = 1; + } + re.translate = NULL; + regfree (&re); + } + + memset (&re, 0, sizeof (re)); + re.translate = trans; + s = re_compile_pattern ("[[:DIGIT:]]", 2, &re); + if (s == NULL) + { + printf ("compilation of \"[[:DIGIT:]]\" pattern unexpectedly succeeded: %s\n", + s); + result = 1; + } + + return result; +} diff --git a/posix/regex_internal.c b/posix/regex_internal.c index ee38670..ed26908 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -189,7 +189,7 @@ re_string_construct_common (str, len, pstr, trans, icase, dfa) pstr->raw_mbs = (const unsigned char *) str; pstr->len = len; pstr->raw_len = len; - pstr->trans = trans; + pstr->trans = (unsigned RE_TRANSLATE_TYPE) trans; pstr->icase = icase ? 1 : 0; pstr->mbs_allocated = (trans != NULL || icase); pstr->mb_cur_max = dfa->mb_cur_max; @@ -758,7 +758,7 @@ re_string_peek_byte_case (pstr, idx) int ch, off; /* Handle the common (easiest) cases first. */ - if (BE (!pstr->icase, 1)) + if (BE (!pstr->mbs_allocated, 1)) return re_string_peek_byte (pstr, idx); #ifdef RE_ENABLE_I18N @@ -774,8 +774,6 @@ re_string_peek_byte_case (pstr, idx) #endif ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; - if (pstr->trans) - ch = pstr->trans[ch]; #ifdef RE_ENABLE_I18N /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I @@ -793,15 +791,13 @@ static unsigned char re_string_fetch_byte_case (pstr) re_string_t *pstr; { - int ch; - - if (BE (!pstr->icase, 1)) + if (BE (!pstr->mbs_allocated, 1)) return re_string_fetch_byte (pstr); #ifdef RE_ENABLE_I18N if (pstr->offsets_needed) { - int off; + int off, ch; /* For tr_TR.UTF-8 [[:islower:]] there is [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip @@ -815,8 +811,6 @@ re_string_fetch_byte_case (pstr) off = pstr->offsets[pstr->cur_idx]; ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; - if (pstr->trans) - ch = pstr->trans[ch]; if (! isascii (ch)) return re_string_fetch_byte (pstr); @@ -827,10 +821,7 @@ re_string_fetch_byte_case (pstr) } #endif - ch = pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; - if (pstr->trans) - ch = pstr->trans[ch]; - return ch; + return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; } static void diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 8f11f89..084028f 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -337,7 +337,7 @@ struct re_string_t the beginning of the input string. */ unsigned int tip_context; /* The translation passed as a part of an argument of re_compile_pattern. */ - RE_TRANSLATE_TYPE trans; + unsigned RE_TRANSLATE_TYPE trans; /* Copy of re_dfa_t's word_char. */ re_const_bitset_ptr_t word_char; /* 1 if REG_ICASE. */ -- 2.7.4