1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997, 1999 Peter Mattis, Red Hat, Inc.
4 * SPDX-License-Identifier: LGPL-2.1-or-later
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "gmessages.h"
29 #include "gstrfuncs.h"
35 * @title: Glob-style pattern matching
36 * @short_description: matches strings against patterns containing '*'
37 * (wildcard) and '?' (joker)
39 * The g_pattern_match* functions match a string
40 * against a pattern containing '*' and '?' wildcards with similar
41 * semantics as the standard glob() function: '*' matches an arbitrary,
42 * possibly empty, string, '?' matches an arbitrary character.
44 * Note that in contrast to glob(), the '/' character can be matched by
45 * the wildcards, there are no '[...]' character ranges and '*' and '?'
46 * can not be escaped to include them literally in a pattern.
48 * When multiple strings must be matched against the same pattern, it
49 * is better to compile the pattern to a #GPatternSpec using
50 * g_pattern_spec_new() and use g_pattern_match_string() instead of
51 * g_pattern_match_simple(). This avoids the overhead of repeated
52 * pattern compilation.
58 * A GPatternSpec struct is the 'compiled' form of a pattern. This
59 * structure is opaque and its fields cannot be accessed directly.
62 /* keep enum and structure of gpattern.c and patterntest.c in sync */
65 G_MATCH_ALL, /* "*A?A*" */
66 G_MATCH_ALL_TAIL, /* "*A?AA" */
67 G_MATCH_HEAD, /* "AAAA*" */
68 G_MATCH_TAIL, /* "*AAAA" */
69 G_MATCH_EXACT, /* "AAAAA" */
75 GMatchType match_type;
83 /* --- functions --- */
84 static inline gboolean
85 g_pattern_ph_match (const gchar *match_pattern,
86 const gchar *match_string,
87 gboolean *wildcard_reached_p)
89 const gchar *pattern, *string;
92 pattern = match_pattern;
93 string = match_string;
104 string = g_utf8_next_char (string);
108 *wildcard_reached_p = TRUE;
117 string = g_utf8_next_char (string);
120 while (ch == '*' || ch == '?');
125 gboolean next_wildcard_reached = FALSE;
126 while (ch != *string)
130 string = g_utf8_next_char (string);
133 if (g_pattern_ph_match (pattern, string, &next_wildcard_reached))
135 if (next_wildcard_reached)
136 /* the forthcoming pattern substring up to the next wildcard has
137 * been matched, but a mismatch occurred for the rest of the
138 * pattern, following the next wildcard.
139 * there's no need to advance the current match position any
140 * further if the rest pattern will not match.
163 * g_pattern_spec_match:
164 * @pspec: a #GPatternSpec
165 * @string_length: the length of @string (in bytes, i.e. strlen(),
166 * not g_utf8_strlen())
167 * @string: the UTF-8 encoded string to match
168 * @string_reversed: (nullable): the reverse of @string or %NULL
170 * Matches a string against a compiled pattern. Passing the correct
171 * length of the string given is mandatory. The reversed string can be
172 * omitted by passing %NULL, this is more efficient if the reversed
173 * version of the string to be matched is not at hand, as
174 * g_pattern_match() will only construct it if the compiled pattern
175 * requires reverse matches.
177 * Note that, if the user code will (possibly) match a string against a
178 * multitude of patterns containing wildcards, chances are high that
179 * some patterns will require a reversed string. In this case, it's
180 * more efficient to provide the reversed string to avoid multiple
181 * constructions thereof in the various calls to g_pattern_match().
183 * Note also that the reverse of a UTF-8 encoded string can in general
184 * not be obtained by g_strreverse(). This works only if the string
185 * does not contain any multibyte characters. GLib offers the
186 * g_utf8_strreverse() function to reverse UTF-8 encoded strings.
188 * Returns: %TRUE if @string matches @pspec
193 g_pattern_spec_match (GPatternSpec *pspec,
196 const gchar *string_reversed)
198 g_return_val_if_fail (pspec != NULL, FALSE);
199 g_return_val_if_fail (string != NULL, FALSE);
201 if (string_length < pspec->min_length ||
202 string_length > pspec->max_length)
205 switch (pspec->match_type)
209 return g_pattern_ph_match (pspec->pattern, string, &dummy);
210 case G_MATCH_ALL_TAIL:
212 return g_pattern_ph_match (pspec->pattern, string_reversed, &dummy);
217 tmp = g_utf8_strreverse (string, string_length);
218 result = g_pattern_ph_match (pspec->pattern, tmp, &dummy);
223 if (pspec->pattern_length == string_length)
224 return strcmp (pspec->pattern, string) == 0;
225 else if (pspec->pattern_length)
226 return strncmp (pspec->pattern, string, pspec->pattern_length) == 0;
230 if (pspec->pattern_length)
231 return strcmp (pspec->pattern, string + (string_length - pspec->pattern_length)) == 0;
235 if (pspec->pattern_length != string_length)
238 return strcmp (pspec->pattern, string) == 0;
240 g_return_val_if_fail (pspec->match_type < G_MATCH_LAST, FALSE);
246 * g_pattern_match: (skip)
247 * @pspec: a #GPatternSpec
248 * @string_length: the length of @string (in bytes, i.e. strlen(),
249 * not g_utf8_strlen())
250 * @string: the UTF-8 encoded string to match
251 * @string_reversed: (nullable): the reverse of @string or %NULL
253 * Matches a string against a compiled pattern. Passing the correct
254 * length of the string given is mandatory. The reversed string can be
255 * omitted by passing %NULL, this is more efficient if the reversed
256 * version of the string to be matched is not at hand, as
257 * g_pattern_match() will only construct it if the compiled pattern
258 * requires reverse matches.
260 * Note that, if the user code will (possibly) match a string against a
261 * multitude of patterns containing wildcards, chances are high that
262 * some patterns will require a reversed string. In this case, it's
263 * more efficient to provide the reversed string to avoid multiple
264 * constructions thereof in the various calls to g_pattern_match().
266 * Note also that the reverse of a UTF-8 encoded string can in general
267 * not be obtained by g_strreverse(). This works only if the string
268 * does not contain any multibyte characters. GLib offers the
269 * g_utf8_strreverse() function to reverse UTF-8 encoded strings.
271 * Returns: %TRUE if @string matches @pspec
272 * Deprecated: 2.70: Use g_pattern_spec_match() instead
275 g_pattern_match (GPatternSpec *pspec,
278 const gchar *string_reversed)
280 return g_pattern_spec_match (pspec, string_length, string, string_reversed);
284 * g_pattern_spec_new:
285 * @pattern: a zero-terminated UTF-8 encoded string
287 * Compiles a pattern to a #GPatternSpec.
289 * Returns: a newly-allocated #GPatternSpec
292 g_pattern_spec_new (const gchar *pattern)
295 gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE;
296 gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1;
297 gboolean follows_wildcard = FALSE;
298 guint pending_jokers = 0;
303 g_return_val_if_fail (pattern != NULL, NULL);
305 /* canonicalize pattern and collect necessary stats */
306 pspec = g_new (GPatternSpec, 1);
307 pspec->pattern_length = strlen (pattern);
308 pspec->min_length = 0;
309 pspec->max_length = 0;
310 pspec->pattern = g_new (gchar, pspec->pattern_length + 1);
312 for (i = 0, s = pattern; *s != 0; s++)
317 if (follows_wildcard) /* compress multiple wildcards */
319 pspec->pattern_length--;
322 follows_wildcard = TRUE;
330 pspec->max_length += 4; /* maximum UTF-8 character length */
333 for (; pending_jokers; pending_jokers--, i++) {
339 follows_wildcard = FALSE;
347 for (; pending_jokers; pending_jokers--) {
354 seen_joker = hj_pos >= 0;
355 seen_wildcard = hw_pos >= 0;
356 more_wildcards = seen_wildcard && hw_pos != tw_pos;
358 pspec->max_length = G_MAXUINT;
360 /* special case sole head/tail wildcard or exact matches */
361 if (!seen_joker && !more_wildcards)
363 if (pspec->pattern[0] == '*')
365 pspec->match_type = G_MATCH_TAIL;
366 memmove (pspec->pattern, pspec->pattern + 1, --pspec->pattern_length);
367 pspec->pattern[pspec->pattern_length] = 0;
370 if (pspec->pattern_length > 0 &&
371 pspec->pattern[pspec->pattern_length - 1] == '*')
373 pspec->match_type = G_MATCH_HEAD;
374 pspec->pattern[--pspec->pattern_length] = 0;
379 pspec->match_type = G_MATCH_EXACT;
384 /* now just need to distinguish between head or tail match start */
385 tw_pos = pspec->pattern_length - 1 - tw_pos; /* last pos to tail distance */
386 tj_pos = pspec->pattern_length - 1 - tj_pos; /* last pos to tail distance */
388 pspec->match_type = tw_pos > hw_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
389 else /* seen_joker */
390 pspec->match_type = tj_pos > hj_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
391 if (pspec->match_type == G_MATCH_ALL_TAIL) {
392 gchar *tmp = pspec->pattern;
393 pspec->pattern = g_utf8_strreverse (pspec->pattern, pspec->pattern_length);
400 * g_pattern_spec_copy:
401 * @pspec: a #GPatternSpec
403 * Copies @pspec in a new #GPatternSpec.
405 * Returns: (transfer full): a copy of @pspec.
410 g_pattern_spec_copy (GPatternSpec *pspec)
412 GPatternSpec *pspec_copy;
414 g_return_val_if_fail (pspec != NULL, NULL);
416 pspec_copy = g_new (GPatternSpec, 1);
417 *pspec_copy = *pspec;
418 pspec_copy->pattern = g_strndup (pspec->pattern, pspec->pattern_length);
424 * g_pattern_spec_free:
425 * @pspec: a #GPatternSpec
427 * Frees the memory allocated for the #GPatternSpec.
430 g_pattern_spec_free (GPatternSpec *pspec)
432 g_return_if_fail (pspec != NULL);
434 g_free (pspec->pattern);
439 * g_pattern_spec_equal:
440 * @pspec1: a #GPatternSpec
441 * @pspec2: another #GPatternSpec
443 * Compares two compiled pattern specs and returns whether they will
444 * match the same set of strings.
446 * Returns: Whether the compiled patterns are equal
449 g_pattern_spec_equal (GPatternSpec *pspec1,
450 GPatternSpec *pspec2)
452 g_return_val_if_fail (pspec1 != NULL, FALSE);
453 g_return_val_if_fail (pspec2 != NULL, FALSE);
455 return (pspec1->pattern_length == pspec2->pattern_length &&
456 pspec1->match_type == pspec2->match_type &&
457 strcmp (pspec1->pattern, pspec2->pattern) == 0);
461 * g_pattern_spec_match_string:
462 * @pspec: a #GPatternSpec
463 * @string: the UTF-8 encoded string to match
465 * Matches a string against a compiled pattern. If the string is to be
466 * matched against more than one pattern, consider using
467 * g_pattern_match() instead while supplying the reversed string.
469 * Returns: %TRUE if @string matches @pspec
474 g_pattern_spec_match_string (GPatternSpec *pspec,
477 g_return_val_if_fail (pspec != NULL, FALSE);
478 g_return_val_if_fail (string != NULL, FALSE);
480 return g_pattern_spec_match (pspec, strlen (string), string, NULL);
484 * g_pattern_match_string: (skip)
485 * @pspec: a #GPatternSpec
486 * @string: the UTF-8 encoded string to match
488 * Matches a string against a compiled pattern. If the string is to be
489 * matched against more than one pattern, consider using
490 * g_pattern_match() instead while supplying the reversed string.
492 * Returns: %TRUE if @string matches @pspec
493 * Deprecated: 2.70: Use g_pattern_spec_match_string() instead
496 g_pattern_match_string (GPatternSpec *pspec,
499 return g_pattern_spec_match_string (pspec, string);
503 * g_pattern_match_simple:
504 * @pattern: the UTF-8 encoded pattern
505 * @string: the UTF-8 encoded string to match
507 * Matches a string against a pattern given as a string. If this
508 * function is to be called in a loop, it's more efficient to compile
509 * the pattern once with g_pattern_spec_new() and call
510 * g_pattern_match_string() repeatedly.
512 * Returns: %TRUE if @string matches @pspec
515 g_pattern_match_simple (const gchar *pattern,
521 g_return_val_if_fail (pattern != NULL, FALSE);
522 g_return_val_if_fail (string != NULL, FALSE);
524 pspec = g_pattern_spec_new (pattern);
525 ergo = g_pattern_spec_match (pspec, strlen (string), string, NULL);
526 g_pattern_spec_free (pspec);