1 /* Pattern Matcher for Fixed String search.
2 Copyright (C) 1992, 1998, 2000, 2005-2006, 2010, 2015 Free Software
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
31 #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
32 /* We can handle multibyte string. */
43 #define _(str) gettext (str)
45 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
46 # define IN_CTYPE_DOMAIN(c) 1
48 # define IN_CTYPE_DOMAIN(c) isascii(c)
50 #define ISUPPER(C) (IN_CTYPE_DOMAIN (C) && isupper (C))
51 #define TOLOWER(C) (ISUPPER(C) ? tolower(C) : (C))
52 #define ISALNUM(C) (IN_CTYPE_DOMAIN (C) && isalnum (C))
53 #define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')
55 #define NCHAR (UCHAR_MAX + 1)
57 struct compiled_kwset {
66 kwsinit (struct compiled_kwset *ckwset,
67 bool match_icase, bool match_words, bool match_lines, char eolbyte)
73 ckwset->trans = XNMALLOC (NCHAR, char);
74 for (i = 0; i < NCHAR; i++)
75 ckwset->trans[i] = TOLOWER (i);
76 ckwset->kwset = kwsalloc (ckwset->trans);
81 ckwset->kwset = kwsalloc (NULL);
83 if (ckwset->kwset == NULL)
84 error (exit_failure, 0, _("memory exhausted"));
85 ckwset->match_words = match_words;
86 ckwset->match_lines = match_lines;
87 ckwset->eolbyte = eolbyte;
91 Fcompile (const char *pattern, size_t pattern_size,
92 bool match_icase, bool match_words, bool match_lines,
95 struct compiled_kwset *ckwset;
99 ckwset = XMALLOC (struct compiled_kwset);
100 kwsinit (ckwset, match_icase, match_words, match_lines, eolbyte);
107 for (lim = beg; lim < pattern + pattern_size && *lim != '\n'; ++lim)
109 if ((err = kwsincr (ckwset->kwset, beg, lim - beg)) != NULL)
110 error (exit_failure, 0, "%s", err);
111 if (lim < pattern + pattern_size)
115 while (beg < pattern + pattern_size);
117 if ((err = kwsprep (ckwset->kwset)) != NULL)
118 error (exit_failure, 0, "%s", err);
123 /* This function allocate the array which correspond to "buf".
124 Then this check multibyte string and mark on the positions which
125 are not singlebyte character nor the first byte of a multibyte
126 character. Caller must free the array. */
128 check_multibyte_string (const char *buf, size_t buf_size)
130 char *mb_properties = (char *) malloc (buf_size);
134 memset (&cur_state, 0, sizeof (mbstate_t));
135 memset (mb_properties, 0, sizeof (char) * buf_size);
136 for (i = 0; i < buf_size ;)
139 mbclen = mbrlen (buf + i, buf_size - i, &cur_state);
141 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
143 /* An invalid sequence, or a truncated multibyte character.
144 We treat it as a singlebyte character. */
147 mb_properties[i] = mbclen;
151 return mb_properties;
156 Fexecute (const void *compiled_pattern, const char *buf, size_t buf_size,
157 size_t *match_size, bool exact)
159 struct compiled_kwset *ckwset = (struct compiled_kwset *) compiled_pattern;
160 char eol = ckwset->eolbyte;
161 register const char *buflim = buf + buf_size;
162 register const char *beg;
167 mb_properties = check_multibyte_string (buf, buf_size);
168 #endif /* MBS_SUPPORT */
170 for (beg = buf; beg <= buflim; ++beg)
172 struct kwsmatch kwsmatch;
173 size_t offset = kwsexec (ckwset->kwset, beg, buflim - beg, &kwsmatch);
174 if (offset == (size_t) -1)
178 free (mb_properties);
179 #endif /* MBS_SUPPORT */
183 if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
184 continue; /* It is a part of multibyte character. */
185 #endif /* MBS_SUPPORT */
187 len = kwsmatch.size[0];
193 free (mb_properties);
194 #endif /* MBS_SUPPORT */
197 if (ckwset->match_lines)
199 if (beg > buf && beg[-1] != eol)
201 if (beg + len < buflim && beg[len] != eol)
205 else if (ckwset->match_words)
207 register const char *curr;
208 for (curr = beg; len; )
210 if (curr > buf && IS_WORD_CONSTITUENT ((unsigned char) curr[-1]))
212 if (curr + len < buflim
213 && IS_WORD_CONSTITUENT ((unsigned char) curr[len]))
215 offset = kwsexec (ckwset->kwset, beg, --len, &kwsmatch);
216 if (offset == (size_t) -1)
220 free (mb_properties);
221 #endif /* MBS_SUPPORT */
225 len = kwsmatch.size[0];
237 free (mb_properties);
238 #endif /* MBS_SUPPORT */
243 register const char *end;
245 end = (const char *) memchr (beg + len, eol, buflim - (beg + len));
250 while (buf < beg && beg[-1] != eol)
252 *match_size = end - beg;
255 free (mb_properties);
256 #endif /* MBS_SUPPORT */
262 Ffree (void *compiled_pattern)
264 struct compiled_kwset *ckwset = (struct compiled_kwset *) compiled_pattern;
266 free (ckwset->trans);
270 matcher_t matcher_fgrep =