1 /* regexprops.c -- document the properties of the regular expressions
4 Copyright 2005, 2007, 2010 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 The output of this program is included in the GNU findutils source
23 distribution. The copying conditions for that file are generated
24 by the copying() function below.
27 /* Written by James Youngman, <jay@gnu.org>. */
38 #include "regextype.h"
43 output (const char *s, int escape)
58 content (const char *s)
64 literal (const char *s)
70 directive (const char *s)
76 comment (const char *s)
84 enum_item (const char *s)
93 begin_subsection (const char *name,
104 directive ("@node ");
106 content (" regular expression syntax");
109 directive ("@subsection ");
110 output ("@samp{", 0);
113 content (" regular expression syntax");
118 begintable_markup (char const *markup)
121 directive ("@table ");
130 directive ("@end table");
138 directive ("@enumerate");
146 directive ("@end enumerate");
158 describe_regex_syntax (int options)
161 content ("The character @samp{.} matches any single character");
162 if ( (options & RE_DOT_NEWLINE) == 0 )
164 content (" except newline");
166 if (options & RE_DOT_NOT_NULL)
168 if ( (options & RE_DOT_NEWLINE) == 0 )
173 content (" the null character");
178 if (!(options & RE_LIMITED_OPS))
180 begintable_markup ("@samp");
181 if (options & RE_BK_PLUS_QM)
184 content ("indicates that the regular expression should match one"
185 " or more occurrences of the previous atom or regexp. ");
187 content ("indicates that the regular expression should match zero"
188 " or one occurrence of the previous atom or regexp. ");
189 enum_item ("+ and ? ");
190 content ("match themselves. ");
195 content ("indicates that the regular expression should match one"
196 " or more occurrences of the previous atom or regexp. ");
198 content ("indicates that the regular expression should match zero"
199 " or one occurrence of the previous atom or regexp. ");
201 literal ("matches a @samp{+}");
203 literal ("matches a @samp{?}. ");
210 content ("Bracket expressions are used to match ranges of characters. ");
211 literal ("Bracket expressions where the range is backward, for example @samp{[z-a]}, are ");
212 if (options & RE_NO_EMPTY_RANGES)
218 if (options & RE_BACKSLASH_ESCAPE_IN_LISTS)
219 literal ("Within square brackets, @samp{\\} can be used to quote "
220 "the following character. ");
222 literal ("Within square brackets, @samp{\\} is taken literally. ");
224 if (options & RE_CHAR_CLASSES)
225 content ("Character classes are supported; for example "
226 "@samp{[[:digit:]]} will match a single decimal digit. ");
228 literal ("Character classes are not supported, so for example "
229 "you would need to use @samp{[0-9]} "
230 "instead of @samp{[[:digit:]]}. ");
232 if (options & RE_HAT_LISTS_NOT_NEWLINE)
234 literal ("Non-matching lists @samp{[^@dots{}]} do not ever match newline. ");
237 if (options & RE_NO_GNU_OPS)
239 content ("GNU extensions are not supported and so "
240 "@samp{\\w}, @samp{\\W}, @samp{\\<}, @samp{\\>}, @samp{\\b}, @samp{\\B}, @samp{\\`}, and @samp{\\'} "
242 "@samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively. ");
246 content ("GNU extensions are supported:");
248 enum_item ("@samp{\\w} matches a character within a word");
249 enum_item ("@samp{\\W} matches a character which is not within a word");
250 enum_item ("@samp{\\<} matches the beginning of a word");
251 enum_item ("@samp{\\>} matches the end of a word");
252 enum_item ("@samp{\\b} matches a word boundary");
253 enum_item ("@samp{\\B} matches characters which are not a word boundary");
254 enum_item ("@samp{\\`} matches the beginning of the whole input");
255 enum_item ("@samp{\\'} matches the end of the whole input");
262 if (options & RE_NO_BK_PARENS)
264 literal ("Grouping is performed with parentheses @samp{()}. ");
266 if (options & RE_UNMATCHED_RIGHT_PAREN_ORD)
267 literal ("An unmatched @samp{)} matches just itself. ");
271 literal ("Grouping is performed with backslashes followed by parentheses @samp{\\(}, @samp{\\)}. ");
274 if (options & RE_NO_BK_REFS)
276 content ("A backslash followed by a digit matches that digit. ");
280 literal ("A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis ");
281 if (options & RE_NO_BK_PARENS)
282 literal ("@samp{(}");
284 literal ("@samp{\\(}");
290 if (!(options & RE_LIMITED_OPS))
292 if (options & RE_NO_BK_VBAR)
293 literal ("The alternation operator is @samp{|}. ");
295 literal ("The alternation operator is @samp{\\|}. ");
299 if (options & RE_CONTEXT_INDEP_ANCHORS)
301 literal ("The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified. ");
305 literal ("The character @samp{^} only represents the beginning of a string when it appears:");
307 enum_item ("\nAt the beginning of a regular expression");
308 enum_item ("After an open-group, signified by ");
309 if (options & RE_NO_BK_PARENS)
311 literal ("@samp{(}");
315 literal ("@samp{\\(}");
318 if (!(options & RE_LIMITED_OPS))
320 if (options & RE_NEWLINE_ALT)
321 enum_item ("After a newline");
323 if (options & RE_NO_BK_VBAR )
324 enum_item ("After the alternation operator @samp{|}");
326 enum_item ("After the alternation operator @samp{\\|}");
331 literal ("The character @samp{$} only represents the end of a string when it appears:");
333 enum_item ("At the end of a regular expression");
334 enum_item ("Before a close-group, signified by ");
335 if (options & RE_NO_BK_PARENS)
337 literal ("@samp{)}");
341 literal ("@samp{\\)}");
343 if (!(options & RE_LIMITED_OPS))
345 if (options & RE_NEWLINE_ALT)
346 enum_item ("Before a newline");
348 if (options & RE_NO_BK_VBAR)
349 enum_item ("Before the alternation operator @samp{|}");
351 enum_item ("Before the alternation operator @samp{\\|}");
356 if (!(options & RE_LIMITED_OPS) )
358 if ((options & RE_CONTEXT_INDEP_OPS)
359 && !(options & RE_CONTEXT_INVALID_OPS))
361 literal ("The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression. ");
365 if (options & RE_BK_PLUS_QM)
366 literal ("@samp{\\*}, @samp{\\+} and @samp{\\?} ");
368 literal ("@samp{*}, @samp{+} and @samp{?} ");
370 if (options & RE_CONTEXT_INVALID_OPS)
372 content ("are special at any point in a regular expression except the following places, where they are not allowed:");
376 content ("are special at any point in a regular expression except:");
380 enum_item ("At the beginning of a regular expression");
381 enum_item ("After an open-group, signified by ");
382 if (options & RE_NO_BK_PARENS)
384 literal ("@samp{(}");
388 literal ("@samp{\\(}");
390 if (!(options & RE_LIMITED_OPS))
392 if (options & RE_NEWLINE_ALT)
393 enum_item ("After a newline");
395 if (options & RE_NO_BK_VBAR)
396 enum_item ("After the alternation operator @samp{|}");
398 enum_item ("After the alternation operator @samp{\\|}");
406 if (options & RE_INTERVALS)
408 if (options & RE_NO_BK_BRACES)
410 literal ("Intervals are specified by @samp{@{} and @samp{@}}. ");
411 if (options & RE_INVALID_INTERVAL_ORD)
413 literal ("Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\\@{1}");
417 literal ("Invalid intervals such as @samp{a@{1z} are not accepted. ");
422 literal ("Intervals are specified by @samp{\\@{} and @samp{\\@}}. ");
423 if (options & RE_INVALID_INTERVAL_ORD)
425 literal ("Invalid intervals are treated as literals, for example @samp{a\\@{1} is treated as @samp{a@{1}");
429 literal ("Invalid intervals such as @samp{a\\@{1z} are not accepted. ");
436 if (options & RE_NO_POSIX_BACKTRACKING)
438 content ("Matching succeeds as soon as the whole pattern is matched, meaning that the result may not be the longest possible match. ");
442 content ("The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups. ");
451 static const char *copy_para[]=
453 "Copyright (C) 1994, 1996, 1998, 2000, 2001, 2003, 2004, 2005, 2006,"
454 ,"2007, 2009, 2010 Free Software Foundation, Inc."
456 ,"Permission is granted to copy, distribute and/or modify this document"
457 ,"under the terms of the GNU Free Documentation License, Version 1.3 or"
458 ,"any later version published by the Free Software Foundation; with no"
459 ,"Invariant Sections, with no Front-Cover Texts, and with no Back-Cover"
460 ,"Texts. A copy of the license is included in the ``GNU Free"
461 ,"Documentation License'' file as part of this distribution."
465 const char **s = copy_para;
471 ignore (int ix, const unsigned int context)
473 return 0 == (get_regex_type_context (ix) & context);
477 menu (unsigned int context)
482 output ("@menu\n", 0);
484 options = get_regex_type_flags (i),
485 name=get_regex_type_name (i);
488 if (!ignore (i, context))
492 content (" regular expression syntax");
497 output ("@end menu\n", 0);
503 get_next (unsigned int ix, unsigned int context)
506 while (get_regex_type_name (ix))
508 if (!ignore (ix, context))
510 next = get_regex_type_name (ix);
523 describe_all (const char *contextname,
524 unsigned int context,
527 const char *name, *next, *previous;
533 literal ("@c this regular expression description is for: ");
534 literal (contextname);
542 options = get_regex_type_flags (i),
543 name=get_regex_type_name (i);
546 if (ignore (i, context))
549 "Skipping regexp type %s for context %s\n",
555 next = get_next (i+1, context);
558 begin_subsection (name, next, previous, up);
559 parent = get_regex_type_synonym (i);
562 content ("This is a synonym for ");
563 content (get_regex_type_name (parent));
568 describe_regex_syntax (options);
577 main (int argc, char *argv[])
580 unsigned int context = CONTEXT_ALL;
581 const char *contextname = "all";
584 set_program_name (argv[0]);
586 set_program_name ("regexprops");
594 contextname = argv[2];
595 if (0 == strcmp (contextname, "findutils"))
596 context = CONTEXT_FINDUTILS;
597 else if (0 == strcmp (contextname, "generic"))
598 context = CONTEXT_GENERIC;
599 else if (0 == strcmp (contextname, "all"))
600 context = CONTEXT_ALL;
603 fprintf (stderr, "Unexpected context %s",
609 describe_all (contextname, context, up);