4 * Copyright (C) 2007-2015 David Lutterkort
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * Author: David Lutterkort <dlutter@redhat.com>
31 static const struct string empty_pattern_string = {
32 .ref = REF_MAX, .str = (char *) "()"
35 static const struct string *const empty_pattern = &empty_pattern_string;
37 char *regexp_escape(const struct regexp *r) {
48 /* Use a range with from > to to force conversion of ranges into
50 ret = fa_restrict_alphabet(r->pattern->str, strlen(r->pattern->str),
51 &nre, &nre_len, 2, 1);
53 pat = escape(nre, nre_len, RX_ESCAPES);
59 pat = escape(r->pattern->str, -1, RX_ESCAPES);
64 /* Remove unneeded '()' from pat */
65 for (int changed = 1; changed;) {
67 for (char *p = pat; *p != '\0'; p++) {
68 if (*p == '(' && p[1] == ')') {
69 memmove(p, p+2, strlen(p+2)+1);
75 if (pat[0] == '(' && pat[strlen(pat)-1] == ')') {
77 for (int i=1; i < strlen(pat)-1; i++) {
86 memmove(pat, pat+1, strlen(pat+1)+1);
87 pat[strlen(pat)-1] = '\0';
94 void print_regexp(FILE *out, struct regexp *r) {
96 fprintf(out, "<NULL>");
101 if (r->pattern == NULL)
102 fprintf(out, "%p", r);
106 fa_restrict_alphabet(r->pattern->str, strlen(r->pattern->str),
108 print_chars(out, rx, rx_len);
117 make_regexp_unescape(struct info *info, const char *pat, int nocase) {
118 char *p = unescape(pat, strlen(pat), NULL);
122 return make_regexp(info, p, nocase);
125 struct regexp *make_regexp(struct info *info, char *pat, int nocase) {
126 struct regexp *regexp;
129 regexp->info = ref(info);
131 make_ref(regexp->pattern);
132 regexp->pattern->str = pat;
133 regexp->nocase = nocase;
137 /* Take a POSIX glob and turn it into a regexp. The regexp is constructed
138 * by doing the following translations of characters in the string:
141 * leave characters escaped with a backslash alone
142 * escape any of ".|{}()+^$" with a backslash
144 * Note that that ignores some of the finer points of globs, like
147 struct regexp *make_regexp_from_glob(struct info *info, const char *glob) {
148 static const char *const star = "[^/]*";
149 static const char *const qmark = "[^/]";
150 static const char *const special = ".|{}()+^$";
151 int newlen = strlen(glob);
154 for (const char *s = glob; *s; s++) {
155 if (*s == '\\' && *(s+1))
158 newlen += strlen(star)-1;
160 newlen += strlen(qmark)-1;
161 else if (strchr(special, *s) != NULL)
165 if (ALLOC_N(pat, newlen + 1) < 0)
169 for (const char *s = glob; *s; s++) {
170 if (*s == '\\' && *(s+1)) {
173 } else if (*s == '*') {
175 } else if (*s == '?') {
176 t = stpcpy(t, qmark);
177 } else if (strchr(special, *s) != NULL) {
185 return make_regexp(info, pat, 0);
188 void free_regexp(struct regexp *regexp) {
191 assert(regexp->ref == 0);
192 unref(regexp->info, info);
193 unref(regexp->pattern, string);
194 if (regexp->re != NULL) {
201 int regexp_is_empty_pattern(struct regexp *r) {
202 for (char *s = r->pattern->str; *s; s++) {
203 if (*s != '(' && *s != ')')
209 struct regexp *make_regexp_literal(struct info *info, const char *text) {
212 /* Escape special characters in text since it should be taken
214 CALLOC(pattern, 2*strlen(text)+1);
216 for (const char *t = text; *t != '\0'; t++) {
217 if ((*t == '\\') && t[1]) {
220 } else if (strchr(".|{}[]()+*?", *t) != NULL) {
227 return make_regexp(info, pattern, 0);
231 regexp_union(struct info *info, struct regexp *r1, struct regexp *r2) {
236 return regexp_union_n(info, 2, r);
239 char *regexp_expand_nocase(struct regexp *r) {
240 const char *p = r->pattern->str, *t;
244 int psub = 0, rsub = 0;
249 ret = fa_expand_nocase(p, strlen(p), &s, &len);
250 ERR_NOMEM(ret == REG_ESPACE, r->info);
251 BUG_ON(ret != REG_NOERROR, r->info, NULL);
253 /* Make sure that r->pattern->str and ret have the same number
254 * of parentheses/groups, since our parser critically depends
255 * on the fact that the regexp for a union/concat and those
256 * of its children have groups that are in direct relation */
257 for (t = p; *t; t++) if (*t == '(') psub += 1;
258 for (t = s; *t; t++) if (*t == '(') rsub += 1;
259 BUG_ON(psub < rsub, r->info, NULL);
262 char *adjusted = NULL, *a;
263 if (ALLOC_N(adjusted, strlen(s) + 2*psub + 1) < 0)
264 ERR_NOMEM(true, r->info);
266 for (int i=0; i < psub; i++) *a++ = '(';
268 for (int i=0; i < psub; i++) *a++ = ')';
276 static char *append_expanded(struct regexp *r, char **pat, char *p,
278 char *expanded = NULL;
279 size_t ofs = p - *pat;
282 expanded = regexp_expand_nocase(r);
285 *len += strlen(expanded) - strlen(r->pattern->str);
287 ret = REALLOC_N(*pat, *len);
288 ERR_NOMEM(ret < 0, r->info);
290 p = stpcpy(*pat + ofs, expanded);
297 regexp_union_n(struct info *info, int n, struct regexp **r) {
299 char *pat = NULL, *p, *expanded = NULL;
300 int nnocase = 0, npresent = 0;
302 for (int i=0; i < n; i++)
304 len += strlen(r[i]->pattern->str) + strlen("()|");
310 bool mixedcase = nnocase > 0 && nnocase < npresent;
315 if (ALLOC_N(pat, len) < 0)
320 for (int i=0; i < n; i++) {
326 if (mixedcase && r[i]->nocase) {
327 p = append_expanded(r[i], &pat, p, &len);
328 ERR_BAIL(r[i]->info);
330 p = stpcpy(p, r[i]->pattern->str);
336 return make_regexp(info, pat, nnocase == npresent);
344 regexp_concat(struct info *info, struct regexp *r1, struct regexp *r2) {
349 return regexp_concat_n(info, 2, r);
353 regexp_concat_n(struct info *info, int n, struct regexp **r) {
355 char *pat = NULL, *p, *expanded = NULL;
356 int nnocase = 0, npresent = 0;
358 for (int i=0; i < n; i++)
360 len += strlen(r[i]->pattern->str) + strlen("()");
366 bool mixedcase = nnocase > 0 && nnocase < npresent;
372 if (ALLOC_N(pat, len) < 0)
376 for (int i=0; i < n; i++) {
380 if (mixedcase && r[i]->nocase) {
381 p = append_expanded(r[i], &pat, p, &len);
382 ERR_BAIL(r[i]->info);
384 p = stpcpy(p, r[i]->pattern->str);
389 return make_regexp(info, pat, nnocase == npresent);
396 static struct fa *regexp_to_fa(struct regexp *r) {
397 const char *p = r->pattern->str;
399 struct fa *fa = NULL;
401 ret = fa_compile(p, strlen(p), &fa);
402 ERR_NOMEM(ret == REG_ESPACE, r->info);
403 BUG_ON(ret != REG_NOERROR, r->info, NULL);
407 ERR_NOMEM(ret < 0, r->info);
417 regexp_minus(struct info *info, struct regexp *r1, struct regexp *r2) {
418 struct regexp *result = NULL;
419 struct fa *fa = NULL, *fa1 = NULL, *fa2 = NULL;
424 fa1 = regexp_to_fa(r1);
427 fa2 = regexp_to_fa(r2);
430 fa = fa_minus(fa1, fa2);
434 r = fa_as_regexp(fa, &s, &s_len);
439 /* FA is the empty set, which we can't represent as a regexp */
443 if (regexp_c_locale(&s, NULL) < 0)
446 result = make_regexp(info, s, fa_is_nocase(fa));
456 unref(result, regexp);
462 regexp_iter(struct info *info, struct regexp *r, int min, int max) {
471 if ((min == 0 || min == 1) && max == -1) {
472 char q = (min == 0) ? '*' : '+';
473 ret = asprintf(&s, "(%s)%c", p, q);
474 } else if (min == max) {
475 ret = asprintf(&s, "(%s){%d}", p, min);
477 ret = asprintf(&s, "(%s){%d,%d}", p, min, max);
479 return (ret == -1) ? NULL : make_regexp(info, s, r->nocase);
483 regexp_maybe(struct info *info, struct regexp *r) {
491 ret = asprintf(&s, "(%s)?", p);
492 return (ret == -1) ? NULL : make_regexp(info, s, r->nocase);
495 struct regexp *regexp_make_empty(struct info *info) {
496 struct regexp *regexp;
499 if (regexp != NULL) {
500 regexp->info = ref(info);
501 /* Casting away the CONST for EMPTY_PATTERN is ok since it
502 is protected against changes because REF == REF_MAX */
503 regexp->pattern = (struct string *) empty_pattern;
509 static int regexp_compile_internal(struct regexp *r, const char **c) {
510 /* See the GNU regex manual or regex.h in gnulib for
511 * an explanation of these flags. They are set so that the regex
512 * matcher interprets regular expressions the same way that libfa
515 static const reg_syntax_t syntax =
516 RE_CONTEXT_INDEP_OPS|RE_CONTEXT_INVALID_OPS|RE_DOT_NOT_NULL
517 |RE_INTERVALS|RE_NO_BK_BRACES|RE_NO_BK_PARENS|RE_NO_BK_REFS
518 |RE_NO_BK_VBAR|RE_NO_EMPTY_RANGES
519 |RE_NO_POSIX_BACKTRACKING|RE_CONTEXT_INVALID_DUP|RE_NO_GNU_OPS;
520 reg_syntax_t old_syntax = re_syntax_options;
527 re_syntax_options = syntax;
529 re_syntax_options |= RE_ICASE;
530 *c = re_compile_pattern(r->pattern->str, strlen(r->pattern->str), r->re);
531 re_syntax_options = old_syntax;
533 r->re->regs_allocated = REGS_REALLOCATE;
539 int regexp_compile(struct regexp *r) {
542 return regexp_compile_internal(r, &c);
545 int regexp_check(struct regexp *r, const char **msg) {
546 return regexp_compile_internal(r, msg);
549 int regexp_match(struct regexp *r,
550 const char *string, const int size,
551 const int start, struct re_registers *regs) {
553 if (regexp_compile(r) == -1)
556 return re_match(r->re, string, size, start, regs);
559 int regexp_matches_empty(struct regexp *r) {
560 return regexp_match(r, "", 0, 0, NULL) == 0;
563 int regexp_nsub(struct regexp *r) {
565 if (regexp_compile(r) == -1)
567 return r->re->re_nsub;
570 void regexp_release(struct regexp *regexp) {
571 if (regexp != NULL && regexp->re != NULL) {
579 * indent-tabs-mode: nil