1 /* Regular expression tests.
2 Copyright (C) 2003 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 #include <sys/types.h>
29 #define BRE RE_SYNTAX_POSIX_BASIC
30 #define ERE RE_SYNTAX_POSIX_EXTENDED
39 {BRE, "\\<A", "CBAA", 0, -1},
40 {BRE, "\\<A", "CBAA", 2, -1},
41 {BRE, "A\\>", "CAAB", 1, -1},
42 {BRE, "\\bA", "CBAA", 0, -1},
43 {BRE, "\\bA", "CBAA", 2, -1},
44 {BRE, "A\\b", "CAAB", 1, -1},
45 {BRE, "\\<A", "AA", 0, 0},
46 {BRE, "\\<A", "C-AA", 2, 2},
47 {BRE, "A\\>", "CAA-", 1, 2},
48 {BRE, "A\\>", "CAA", 1, 2},
49 {BRE, "\\bA", "AA", 0, 0},
50 {BRE, "\\bA", "C-AA", 2, 2},
51 {BRE, "A\\b", "CAA-", 1, 2},
52 {BRE, "A\\b", "CAA", 1, 2},
53 {ERE, "\\b(A|!|.B)", "A=AC", 0, 0},
54 {ERE, "\\b(A|!|.B)", "=AC", 0, 1},
55 {ERE, "\\b(A|!|.B)", "!AC", 0, 1},
56 {ERE, "\\b(A|!|.B)", "=AB", 0, 1},
57 {ERE, "\\b(A|!|.B)", "DA!C", 0, 2},
58 {ERE, "\\b(A|!|.B)", "=CB", 0, 1},
59 {ERE, "\\b(A|!|.B)", "!CB", 0, 1},
60 {ERE, "\\b(A|!|.B)", "D,B", 0, 1},
61 {ERE, "\\b(A|!|.B)", "!.C", 0, -1},
62 {ERE, "\\b(A|!|.B)", "BCB", 0, -1},
63 {ERE, "(A|\\b)(A|B|C)", "DAAD", 0, 1},
64 {ERE, "(A|\\b)(A|B|C)", "DABD", 0, 1},
65 {ERE, "(A|\\b)(A|B|C)", "AD", 0, 0},
66 {ERE, "(A|\\b)(A|B|C)", "C!", 0, 0},
67 {ERE, "(A|\\b)(A|B|C)", "D,B", 0, 2},
68 {ERE, "(A|\\b)(A|B|C)", "DA?A", 0, 3},
69 {ERE, "(A|\\b)(A|B|C)", "BBC", 0, 0},
70 {ERE, "(A|\\b)(A|B|C)", "DA", 0, -1},
71 {ERE, "(!|\\b)(!|=|~)", "A!=\\", 0, 1},
72 {ERE, "(!|\\b)(!|=|~)", "/!=A", 0, 1},
73 {ERE, "(!|\\b)(!|=|~)", "A=A", 0, 1},
74 {ERE, "(!|\\b)(!|=|~)", "==!=", 0, 2},
75 {ERE, "(!|\\b)(!|=|~)", "==C~", 0, 3},
76 {ERE, "(!|\\b)(!|=|~)", "=~=", 0, -1},
77 {ERE, "(!|\\b)(!|=|~)", "~!", 0, -1},
78 {ERE, "(!|\\b)(!|=|~)", "~=~", 0, -1},
79 {ERE, "(\\b|A.)[ABC]", "AC", 0, 0},
80 {ERE, "(\\b|A.)[ABC]", "=A", 0, 1},
81 {ERE, "(\\b|A.)[ABC]", "DACC", 0, 1},
82 {ERE, "(\\b|A.)[A~C]", "AC", 0, 0},
83 {ERE, "(\\b|A.)[A~C]", "=A", 0, 1},
84 {ERE, "(\\b|A.)[A~C]", "DACC", 0, 1},
85 {ERE, "(\\b|A.)[A~C]", "B!A=", 0, 2},
86 {ERE, "(\\b|A.)[A~C]", "B~C", 0, 1},
87 {ERE, ".\\b.", "AA~", 0, 1},
88 {ERE, ".\\b.", "=A=", 0, 0},
89 {ERE, ".\\b.", "==", 0, -1},
90 {ERE, ".\\b.", "ABA", 0, -1},
91 {ERE, "\\<(A|!|.B)", "A=AC", 0, 0},
92 {ERE, "\\<(A|!|.B)", "=AC", 0, 1},
93 {ERE, "\\<(A|!|.B)", "!AC", 0, 1},
94 {ERE, "\\<(A|!|.B)", "=AB", 0, 1},
95 {ERE, "\\<(A|!|.B)", "=CB", 0, 1},
96 {ERE, "\\<(A|!|.B)", "!CB", 0, 1},
97 {ERE, "\\<(A|!|.B)", "DA!C", 0, -1},
98 {ERE, "\\<(A|!|.B)", "D,B", 0, -1},
99 {ERE, "\\<(A|!|.B)", "!.C", 0, -1},
100 {ERE, "\\<(A|!|.B)", "BCB", 0, -1},
101 {ERE, "(A|\\<)(A|B|C)", "DAAD", 0, 1},
102 {ERE, "(A|\\<)(A|B|C)", "DABD", 0, 1},
103 {ERE, "(A|\\<)(A|B|C)", "AD", 0, 0},
104 {ERE, "(A|\\<)(A|B|C)", "C!", 0, 0},
105 {ERE, "(A|\\<)(A|B|C)", "D,B", 0, 2},
106 {ERE, "(A|\\<)(A|B|C)", "DA?A", 0, 3},
107 {ERE, "(A|\\<)(A|B|C)", "BBC", 0, 0},
108 {ERE, "(A|\\<)(A|B|C)", "DA", 0, -1},
109 {ERE, "(!|\\<)(!|=|~)", "A!=\\", 0, 1},
110 {ERE, "(!|\\<)(!|=|~)", "/!=A", 0, 1},
111 {ERE, "(!|\\<)(!|=|~)", "==!=", 0, 2},
112 {ERE, "(!|\\<)(!|=|~)", "==C~", 0, -1},
113 {ERE, "(!|\\<)(!|=|~)", "A=A", 0, -1},
114 {ERE, "(!|\\<)(!|=|~)", "=~=", 0, -1},
115 {ERE, "(!|\\<)(!|=|~)", "~!", 0, -1},
116 {ERE, "(!|\\<)(!|=|~)", "~=~", 0, -1},
117 {ERE, "(\\<|A.)[ABC]", "AC", 0, 0},
118 {ERE, "(\\<|A.)[ABC]", "=A", 0, 1},
119 {ERE, "(\\<|A.)[ABC]", "DACC", 0, 1},
120 {ERE, "(\\<|A.)[A~C]", "AC", 0, 0},
121 {ERE, "(\\<|A.)[A~C]", "=A", 0, 1},
122 {ERE, "(\\<|A.)[A~C]", "DACC", 0, 1},
123 {ERE, "(\\<|A.)[A~C]", "B!A=", 0, 2},
124 {ERE, "(\\<|A.)[A~C]", "B~C", 0, 2},
125 {ERE, ".\\<.", "=A=", 0, 0},
126 {ERE, ".\\<.", "AA~", 0, -1},
127 {ERE, ".\\<.", "==", 0, -1},
128 {ERE, ".\\<.", "ABA", 0, -1},
129 {ERE, ".\\B.", "ABA", 0, 0},
130 {ERE, ".\\B.", "=BDC", 0, 1},
131 {ERE, ".(\\b|\\B).", "=~AB", 0, 1},
132 {ERE, ".(\\b|\\B).", "A=C", 0, 0},
133 {ERE, ".(\\b|\\B).", "ABC", 0, 0},
134 {ERE, ".(\\b|\\B).", "=~\\!", 0, -1},
138 do_one_test (const struct test_s *test, const char *fail)
142 struct re_pattern_buffer regbuf;
144 re_set_syntax (test->syntax);
145 memset (®buf, '\0', sizeof (regbuf));
146 err = re_compile_pattern (test->pattern, strlen (test->pattern),
150 printf ("%sre_compile_pattern \"%s\" failed: %s\n", fail, test->pattern,
155 res = re_search (®buf, test->string, strlen (test->string),
156 test->start, strlen (test->string) - test->start, NULL);
157 if (res != test->res)
159 printf ("%sre_search \"%s\" \"%s\" failed: %d (expected %d)\n",
160 fail, test->pattern, test->string, res, test->res);
165 if (test->res > 0 && test->start == 0)
167 res = re_search (®buf, test->string, strlen (test->string),
168 test->res, strlen (test->string) - test->res, NULL);
169 if (res != test->res)
171 printf ("%sre_search from expected \"%s\" \"%s\" failed: %d (expected %d)\n",
172 fail, test->pattern, test->string, res, test->res);
183 replace (char *p, char c)
188 case 'A': *p++ = '\xc3'; *p++ = '\x84'; break;
190 case 'B': *p++ = '\xc3'; *p++ = '\x96'; break;
192 case 'C': *p++ = '\xc3'; *p++ = '\x9c'; break;
194 case 'D': *p++ = '\xc3'; *p++ = '\xa4'; break;
195 /* ! -> MULTIPLICATION SIGN */
196 case '!': *p++ = '\xc3'; *p++ = '\x97'; break;
198 case '=': *p++ = '\xe2'; *p++ = '\x80'; *p++ = '\x94'; break;
199 /* ~ -> MUSICAL SYMBOL HALF NOTE */
200 case '~': *p++ = '\xf0'; *p++ = '\x9d'; *p++ = '\x85'; *p++ = '\x9e';
207 do_mb_tests (const struct test_s *test)
211 const char *const chars = "ABCD!=~";
213 char pattern[strlen (test->pattern) * 4 + 1];
214 char string[strlen (test->string) * 4 + 1];
215 char fail[8 + sizeof ("UTF-8 ")];
219 strcpy (fail, "UTF-8 ");
220 for (i = 1; i < 128; ++i)
223 for (j = 0; j < 7; ++j)
226 if (!strchr (test->pattern, chars[j])
227 && !strchr (test->string, chars[j]))
235 for (j = 0, p = pattern; test->pattern[j]; ++j)
236 if (strchr (repl, test->pattern[j]))
237 p = replace (p, test->pattern[j]);
238 else if (test->pattern[j] == '\\' && test->pattern[j + 1])
240 *p++ = test->pattern[j++];
241 *p++ = test->pattern[j];
244 *p++ = test->pattern[j];
247 t.start = test->start;
250 for (j = 0, p = string; test->string[j]; ++j)
251 if (strchr (repl, test->string[j]))
253 char *d = replace (p, test->string[j]);
255 t.start += d - p - 1;
261 *p++ = test->string[j];
264 p = stpcpy (fail + strlen ("UTF-8 "), repl);
268 if (do_one_test (&t, fail))
282 for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
284 if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
286 puts ("setlocale de_DE.ISO-8859-1 failed");
289 ret |= do_one_test (&tests[i], "");
290 if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
292 puts ("setlocale de_DE.UTF-8 failed");
295 ret |= do_one_test (&tests[i], "UTF-8 ");
296 // Until the implementation is fixed, ignore the results of the
298 /* ret |= */do_mb_tests (&tests[i]);