1 /* Pattern Matchers for Regular Expressions.
2 Copyright (C) 1992, 1998, 2000, 2005-2006, 2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
34 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
35 # define IN_CTYPE_DOMAIN(c) 1
37 # define IN_CTYPE_DOMAIN(c) isascii(c)
39 #define ISALNUM(C) (IN_CTYPE_DOMAIN (C) && isalnum (C))
40 #define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')
44 /* Regex compiled regexp. */
45 struct re_pattern_buffer regexbuf;
46 struct re_registers regs; /* This is here on account of a BRAIN-DEAD
47 Q@#%!# library interface in regex.c. */
50 struct compiled_regex {
55 /* The Regex compiled patterns. */
56 struct patterns *patterns;
61 compile (const char *pattern, size_t pattern_size,
62 bool match_icase, bool match_words, bool match_lines, char eolbyte,
65 struct compiled_regex *cregex;
67 cregex = XMALLOC (struct compiled_regex);
68 memset (cregex, '\0', sizeof (struct compiled_regex));
69 cregex->match_words = match_words;
70 cregex->match_lines = match_lines;
71 cregex->eolbyte = eolbyte;
72 cregex->patterns = NULL;
75 re_set_syntax (syntax);
77 /* For GNU regex compiler we have to pass the patterns separately to detect
78 errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]"
79 GNU regex should have raised a syntax error. The same for backref, where
80 the backref should have been local to each pattern. */
83 size_t total = pattern_size;
84 const char *motif = pattern;
91 sep = (const char *) memchr (motif, '\n', total);
104 cregex->patterns = xrealloc (cregex->patterns, (cregex->pcount + 1) * sizeof (struct patterns));
105 memset (&cregex->patterns[cregex->pcount], '\0', sizeof (struct patterns));
107 if ((err = re_compile_pattern (motif, len,
108 &cregex->patterns[cregex->pcount].regexbuf)) != NULL)
109 error (exit_failure, 0, "%s", err);
114 while (sep && total != 0);
121 Gcompile (const char *pattern, size_t pattern_size,
122 bool match_icase, bool match_words, bool match_lines, char eolbyte)
124 return compile (pattern, pattern_size,
125 match_icase, match_words, match_lines, eolbyte,
126 RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
130 Ecompile (const char *pattern, size_t pattern_size,
131 bool match_icase, bool match_words, bool match_lines, char eolbyte)
133 return compile (pattern, pattern_size,
134 match_icase, match_words, match_lines, eolbyte,
135 RE_SYNTAX_POSIX_EGREP);
139 AWKcompile (const char *pattern, size_t pattern_size,
140 bool match_icase, bool match_words, bool match_lines, char eolbyte)
142 return compile (pattern, pattern_size,
143 match_icase, match_words, match_lines, eolbyte,
148 EGexecute (const void *compiled_pattern,
149 const char *buf, size_t buf_size,
150 size_t *match_size, bool exact)
152 struct compiled_regex *cregex = (struct compiled_regex *) compiled_pattern;
153 char eol = cregex->eolbyte;
154 register const char *buflim = buf + buf_size;
155 register const char *beg;
156 register const char *end;
158 for (beg = buf; beg < buflim; beg = end)
162 end = (const char *) memchr (beg, eol, buflim - beg);
165 /* Here, either end < buflim && *end == eol, or end == buflim. */
167 for (i = 0; i < cregex->pcount; i++)
171 cregex->patterns[i].regexbuf.not_eol = 0;
172 if (0 <= (start = re_search (&cregex->patterns[i].regexbuf, beg,
174 end - beg, &cregex->patterns[i].regs)))
176 len = cregex->patterns[i].regs.end[0] - start;
182 if (cregex->match_lines)
184 if (len == end - beg) /* implies start == 0 */
187 else if (cregex->match_words)
189 /* If -w, check if the match aligns with word boundaries.
190 We do this iteratively because:
191 (a) the line may contain more than one occurence of the
193 (b) Several alternatives in the pattern might be valid at
194 a given point, and we may need to consider a shorter
195 one to find a word boundary. */
198 if ((start == 0 || !IS_WORD_CONSTITUENT ((unsigned char) beg[start - 1]))
199 && (start + len == end - beg
200 || !IS_WORD_CONSTITUENT ((unsigned char) beg[start + len])))
204 /* Try a shorter length anchored at the same place. */
206 cregex->patterns[i].regexbuf.not_eol = 1;
207 len = re_match (&cregex->patterns[i].regexbuf, beg,
209 &cregex->patterns[i].regs);
213 /* Try looking further on. */
214 if (start == end - beg)
217 cregex->patterns[i].regexbuf.not_eol = 0;
218 start = re_search (&cregex->patterns[i].regexbuf, beg,
220 start, end - beg - start,
221 &cregex->patterns[i].regs);
222 len = cregex->patterns[i].regs.end[0] - start;
237 *match_size = end - beg;
242 EGfree (void *compiled_pattern)
244 struct compiled_regex *cregex = (struct compiled_regex *) compiled_pattern;
246 free (cregex->patterns);
250 /* POSIX Basic Regular Expressions */
251 matcher_t matcher_grep =
258 /* POSIX Extended Regular Expressions */
259 matcher_t matcher_egrep =
266 /* AWK Regular Expressions */
267 matcher_t matcher_awk =