Imported Upstream version 0.7.2
[platform/upstream/ltrace.git] / glob.c
1 /*
2  * This file is part of ltrace.
3  * Copyright (C) 2007, 2008, 2012 Petr Machata, Red Hat Inc.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of the
8  * License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18  * 02110-1301 USA
19  */
20
21 #include <sys/types.h>
22 #include <regex.h>
23 #include <string.h>
24 #include <stdlib.h>
25 #include <assert.h>
26
27 static ssize_t
28 match_character_class(const char *glob, size_t length, size_t from)
29 {
30         size_t i;
31         if (length > 0)
32                 for (i = from + 2; i < length - 1 && glob[++i] != ':'; )
33                         ;
34         if (i >= length || glob[++i] != ']')
35                 return -1;
36         return i;
37 }
38
39 static ssize_t
40 match_brack(const char *glob, size_t length, size_t from, int *exclmp)
41 {
42         size_t i = from + 1;
43
44         if (i >= length)
45                 return -1;
46
47         /* Complement operator.  */
48         *exclmp = 0;
49         if (glob[i] == '^' || glob[i] == '!') {
50                 *exclmp = glob[i++] == '!';
51                 if (i >= length)
52                         return -1;
53         }
54
55         /* On first character, both [ and ] are legal.  But when [ is
56          * followed with :, it's character class.  */
57         if (glob[i] == '[' && glob[i + 1] == ':') {
58                 ssize_t j = match_character_class(glob, length, i);
59                 if (j < 0)
60                 fail:
61                         return -1;
62                 i = j;
63         }
64         ++i; /* skip any character, including [ or ]  */
65
66         int escape = 0;
67         for (; i < length; ++i) {
68                 char c = glob[i];
69                 if (escape) {
70                         ++i;
71                         escape = 0;
72
73                 } else if (c == '[' && glob[i + 1] == ':') {
74                         ssize_t j = match_character_class(glob, length, i);
75                         if (j < 0)
76                                 goto fail;
77                         i = j;
78
79                 } else if (c == ']') {
80                         return i;
81                 }
82         }
83         return -1;
84 }
85
86 static int
87 append(char **bufp, const char *str, size_t str_size,
88        size_t *sizep, size_t *allocp)
89 {
90         if (str_size == 0)
91                 str_size = strlen(str);
92         size_t nsize = *sizep + str_size;
93         if (nsize > *allocp) {
94                 size_t nalloc = nsize * 2;
95                 char *nbuf = realloc(*bufp, nalloc);
96                 if (nbuf == NULL)
97                         return -1;
98                 *allocp = nalloc;
99                 *bufp = nbuf;
100         }
101
102         memcpy(*bufp + *sizep, str, str_size);
103         *sizep = nsize;
104         return 0;
105 }
106
107 static int
108 glob_to_regex(const char *glob, char **retp)
109 {
110         size_t allocd = 0;
111         size_t size = 0;
112         char *buf = NULL;
113
114         size_t length = strlen(glob);
115         int escape = 0;
116         size_t i;
117         for(i = 0; i < length; ++i) {
118                 char c = glob[i];
119                 if (escape) {
120                         if (c == '\\') {
121                                 if (append(&buf, "\\\\", 0,
122                                            &size, &allocd) < 0) {
123                                 fail:
124                                         free(buf);
125                                         return REG_ESPACE;
126                                 }
127
128                         } else if (c == '*') {
129                                 if (append(&buf, "\\*", 0, &size, &allocd) < 0)
130                                         goto fail;
131                         } else if (c == '?') {
132                                 if (append(&buf, "?", 0, &size, &allocd) < 0)
133                                         goto fail;
134                         } else if (append(&buf, (char[]){ '\\', c }, 2,
135                                           &size, &allocd) < 0)
136                                 goto fail;
137                         escape = 0;
138                 } else {
139                         if (c == '\\')
140                                 escape = 1;
141                         else if (c == '[') {
142                                 int exclm;
143                                 ssize_t j = match_brack(glob, length, i, &exclm);
144                                 if (j < 0) {
145                                         free(buf);
146                                         return REG_EBRACK;
147                                 }
148                                 if (exclm
149                                     && append(&buf, "[^", 2,
150                                               &size, &allocd) < 0)
151                                         goto fail;
152                                 if (append(&buf, glob + i + 2*exclm,
153                                            j - i + 1 - 2*exclm,
154                                            &size, &allocd) < 0)
155                                         goto fail;
156                                 i = j;
157
158                         } else if (c == '*') {
159                                 if (append(&buf, ".*", 0, &size, &allocd) < 0)
160                                         goto fail;
161                         } else if (c == '?') {
162                                 if (append(&buf, ".", 0, &size, &allocd) < 0)
163                                         goto fail;
164                         } else if (c == '.') {
165                                 if (append(&buf, "\\.", 0, &size, &allocd) < 0)
166                                         goto fail;
167                         } else if (append(&buf, &c, 1, &size, &allocd) < 0)
168                                 goto fail;
169                 }
170         }
171
172         if (escape) {
173                 free(buf);
174                 return REG_EESCAPE;
175         }
176
177         {
178                 char c = 0;
179                 if (append(&buf, &c, 1, &size, &allocd) < 0)
180                         goto fail;
181         }
182         *retp = buf;
183         return REG_NOERROR;
184 }
185
186 int
187 globcomp(regex_t *preg, const char *glob, int cflags)
188 {
189         char *regex = NULL;
190         int status = glob_to_regex(glob, &regex);
191         if (status != REG_NOERROR)
192                 return status;
193         assert(regex != NULL);
194         status = regcomp(preg, regex, cflags);
195         free(regex);
196         return status;
197 }
198
199 #ifdef TEST
200 #include <stdio.h>
201
202 static void
203 translate(const char *glob, int exp_status, const char *expect)
204 {
205         char *pattern = NULL;
206         int status = glob_to_regex(glob, &pattern);
207         if (status != exp_status) {
208                 fprintf(stderr, "translating %s, expected status %d, got %d\n",
209                         glob, exp_status, status);
210                 return;
211         }
212
213         if (status == 0) {
214                 assert(pattern != NULL);
215                 if (strcmp(pattern, expect) != 0)
216                         fprintf(stderr, "translating %s, expected %s, got %s\n",
217                                 glob, expect, pattern);
218                 free(pattern);
219         } else {
220                 assert(pattern == NULL);
221         }
222 }
223
224 static void
225 try_match(const char *glob, const char *str, int expect)
226 {
227         regex_t preg;
228         int status = globcomp(&preg, glob, 0);
229         assert(status == 0);
230         status = regexec(&preg, str, 0, NULL, 0);
231         assert(status == expect);
232         regfree(&preg);
233 }
234
235 int
236 main(void)
237 {
238         translate("*", 0, ".*");
239         translate("?", 0, ".");
240         translate(".*", 0, "\\..*");
241         translate("*.*", 0, ".*\\..*");
242         translate("*a*", 0, ".*a.*");
243         translate("[abc]", 0, "[abc]");
244         translate("[^abc]", 0, "[^abc]");
245         translate("[!abc]", 0, "[^abc]");
246         translate("[]]", 0, "[]]");
247         translate("[[]", 0, "[[]");
248         translate("[^]]", 0, "[^]]");
249         translate("[^a-z]", 0, "[^a-z]");
250         translate("[abc\\]]", 0, "[abc\\]]");
251         translate("[abc\\]def]", 0, "[abc\\]def]");
252         translate("[[:space:]]", 0, "[[:space:]]");
253         translate("[^[:space:]]", 0, "[^[:space:]]");
254         translate("[![:space:]]", 0, "[^[:space:]]");
255         translate("[^a-z]*", 0, "[^a-z].*");
256         translate("[^a-z]bar*", 0, "[^a-z]bar.*");
257         translate("*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.", 0,
258                   ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\."
259                   ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\.");
260
261         translate("\\", REG_EESCAPE, NULL);
262         translate("[^[:naotuh\\", REG_EBRACK, NULL);
263         translate("[^[:", REG_EBRACK, NULL);
264         translate("[^[", REG_EBRACK, NULL);
265         translate("[^", REG_EBRACK, NULL);
266         translate("[\\", REG_EBRACK, NULL);
267         translate("[", REG_EBRACK, NULL);
268         translate("abc[", REG_EBRACK, NULL);
269
270         try_match("abc*def", "abc012def", 0);
271         try_match("abc*def", "ab012def", REG_NOMATCH);
272         try_match("[abc]*def", "a1def", 0);
273         try_match("[abc]*def", "b1def", 0);
274         try_match("[abc]*def", "d1def", REG_NOMATCH);
275
276         return 0;
277 }
278
279 #endif