1 /* GPattern copy that supports raw (non-utf8) matching
2 * based on: GLIB - Library of useful routines for C programming
3 * Copyright (C) 1995-1997, 1999 Peter Mattis, Red Hat, Inc.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the
17 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
24 #include "patternspec.h"
29 MATCH_ALL, /* "*A?A*" */
30 MATCH_ALL_TAIL, /* "*A?AA" */
31 MATCH_HEAD, /* "AAAA*" */
32 MATCH_TAIL, /* "*AAAA" */
33 MATCH_EXACT, /* "AAAAA" */
48 raw_strreverse (const gchar * str, gssize size)
51 return g_strreverse (g_strndup (str, size));
54 static inline gboolean
55 pattern_ph_match (const gchar * match_pattern, MatchMode match_mode,
56 const gchar * match_string, gboolean * wildcard_reached_p)
58 register const gchar *pattern, *string;
61 pattern = match_pattern;
62 string = match_string;
71 if (match_mode == MATCH_MODE_UTF8)
72 string = g_utf8_next_char (string);
78 *wildcard_reached_p = TRUE;
85 if (match_mode == MATCH_MODE_UTF8)
86 string = g_utf8_next_char (string);
91 while (ch == '*' || ch == '?');
95 gboolean next_wildcard_reached = FALSE;
96 while (ch != *string) {
99 if (match_mode == MATCH_MODE_UTF8)
100 string = g_utf8_next_char (string);
105 if (pattern_ph_match (pattern, match_mode, string,
106 &next_wildcard_reached))
108 if (next_wildcard_reached)
109 /* the forthcoming pattern substring up to the next wildcard has
110 * been matched, but a mismatch occoured for the rest of the
111 * pattern, following the next wildcard.
112 * there's no need to advance the current match position any
113 * further if the rest pattern will not match.
136 pattern_match (PatternSpec * pspec, guint string_length,
137 const gchar * string, const gchar * string_reversed)
139 MatchMode match_mode;
141 g_assert (pspec != NULL);
142 g_assert (string != NULL);
144 if (string_length < pspec->min_length || string_length > pspec->max_length)
147 match_mode = pspec->match_mode;
148 if (match_mode == MATCH_MODE_AUTO) {
149 if (!g_utf8_validate (string, string_length, NULL))
150 match_mode = MATCH_MODE_RAW;
152 match_mode = MATCH_MODE_UTF8;
155 switch (pspec->match_type) {
158 return pattern_ph_match (pspec->pattern, match_mode, string, &dummy);
161 return pattern_ph_match (pspec->pattern, match_mode, string_reversed,
166 if (match_mode == MATCH_MODE_UTF8) {
167 tmp = g_utf8_strreverse (string, string_length);
169 tmp = raw_strreverse (string, string_length);
171 result = pattern_ph_match (pspec->pattern, match_mode, tmp, &dummy);
176 if (pspec->pattern_length == string_length)
177 return memcmp (pspec->pattern, string, string_length) == 0;
178 else if (pspec->pattern_length)
179 return memcmp (pspec->pattern, string, pspec->pattern_length) == 0;
183 if (pspec->pattern_length)
184 /* compare incl. NUL terminator */
185 return memcmp (pspec->pattern,
186 string + (string_length - pspec->pattern_length),
187 pspec->pattern_length + 1) == 0;
191 if (pspec->pattern_length != string_length)
194 return memcmp (pspec->pattern, string, string_length) == 0;
196 g_return_val_if_fail (pspec->match_type < MATCH_LAST, FALSE);
202 pattern_spec_new (const gchar * pattern, MatchMode match_mode)
205 gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE;
206 gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1;
207 gboolean follows_wildcard = FALSE;
208 guint pending_jokers = 0;
213 g_assert (pattern != NULL);
214 g_assert (match_mode != MATCH_MODE_UTF8
215 || g_utf8_validate (pattern, -1, NULL));
217 /* canonicalize pattern and collect necessary stats */
218 pspec = g_new (PatternSpec, 1);
219 pspec->match_mode = match_mode;
220 pspec->pattern_length = strlen (pattern);
221 pspec->min_length = 0;
222 pspec->max_length = 0;
223 pspec->pattern = g_new (gchar, pspec->pattern_length + 1);
225 if (pspec->match_mode == MATCH_MODE_AUTO) {
226 if (!g_utf8_validate (pattern, -1, NULL))
227 pspec->match_mode = MATCH_MODE_RAW;
231 for (i = 0, s = pattern; *s != 0; s++) {
234 if (follows_wildcard) { /* compress multiple wildcards */
235 pspec->pattern_length--;
238 follows_wildcard = TRUE;
246 if (pspec->match_mode == MATCH_MODE_RAW) {
247 pspec->max_length += 1;
249 pspec->max_length += 4; /* maximum UTF-8 character length */
253 for (; pending_jokers; pending_jokers--, i++) {
259 follows_wildcard = FALSE;
267 for (; pending_jokers; pending_jokers--) {
274 seen_joker = hj_pos >= 0;
275 seen_wildcard = hw_pos >= 0;
276 more_wildcards = seen_wildcard && hw_pos != tw_pos;
278 pspec->max_length = G_MAXUINT;
280 /* special case sole head/tail wildcard or exact matches */
281 if (!seen_joker && !more_wildcards) {
282 if (pspec->pattern[0] == '*') {
283 pspec->match_type = MATCH_TAIL;
284 memmove (pspec->pattern, pspec->pattern + 1, --pspec->pattern_length);
285 pspec->pattern[pspec->pattern_length] = 0;
288 if (pspec->pattern_length > 0 &&
289 pspec->pattern[pspec->pattern_length - 1] == '*') {
290 pspec->match_type = MATCH_HEAD;
291 pspec->pattern[--pspec->pattern_length] = 0;
294 if (!seen_wildcard) {
295 pspec->match_type = MATCH_EXACT;
300 /* now just need to distinguish between head or tail match start */
301 tw_pos = pspec->pattern_length - 1 - tw_pos; /* last pos to tail distance */
302 tj_pos = pspec->pattern_length - 1 - tj_pos; /* last pos to tail distance */
304 pspec->match_type = tw_pos > hw_pos ? MATCH_ALL_TAIL : MATCH_ALL;
305 else /* seen_joker */
306 pspec->match_type = tj_pos > hj_pos ? MATCH_ALL_TAIL : MATCH_ALL;
307 if (pspec->match_type == MATCH_ALL_TAIL) {
308 gchar *tmp = pspec->pattern;
310 if (pspec->match_mode == MATCH_MODE_RAW) {
311 pspec->pattern = raw_strreverse (pspec->pattern, pspec->pattern_length);
314 g_utf8_strreverse (pspec->pattern, pspec->pattern_length);
322 pattern_spec_free (PatternSpec * pspec)
324 g_assert (pspec != NULL);
326 g_free (pspec->pattern);
331 pattern_match_string (PatternSpec * pspec, const gchar * string)
333 return pattern_match (pspec, strlen (string), string, NULL);