1 /* regexprops.c -- document the properties of the regular expressions
4 Copyright 2005, 2007, 2010, 2011 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 The output of this program is included in the GNU findutils source
23 distribution. The copying conditions for that file are generated
24 by the copying() function below.
27 /* Written by James Youngman, <jay@gnu.org>. */
29 /* config.h must be included first. */
43 #include "regextype.h"
46 output (const char *s, int escape)
61 content (const char *s)
67 literal (const char *s)
73 directive (const char *s)
79 comment (const char *s)
87 enum_item (const char *s)
96 begin_subsection (const char *name,
107 directive ("@node ");
109 content (" regular expression syntax");
112 directive ("@subsection ");
113 output ("@samp{", 0);
116 content (" regular expression syntax");
121 begintable_markup (char const *markup)
124 directive ("@table ");
133 directive ("@end table");
141 directive ("@enumerate");
149 directive ("@end enumerate");
161 describe_regex_syntax (int options)
164 content ("The character @samp{.} matches any single character");
165 if ( (options & RE_DOT_NEWLINE) == 0 )
167 content (" except newline");
169 if (options & RE_DOT_NOT_NULL)
171 if ( (options & RE_DOT_NEWLINE) == 0 )
176 content (" the null character");
181 if (!(options & RE_LIMITED_OPS))
183 begintable_markup ("@samp");
184 if (options & RE_BK_PLUS_QM)
187 content ("indicates that the regular expression should match one"
188 " or more occurrences of the previous atom or regexp. ");
190 content ("indicates that the regular expression should match zero"
191 " or one occurrence of the previous atom or regexp. ");
192 enum_item ("+ and ? ");
193 content ("match themselves. ");
198 content ("indicates that the regular expression should match one"
199 " or more occurrences of the previous atom or regexp. ");
201 content ("indicates that the regular expression should match zero"
202 " or one occurrence of the previous atom or regexp. ");
204 literal ("matches a @samp{+}");
206 literal ("matches a @samp{?}. ");
213 content ("Bracket expressions are used to match ranges of characters. ");
214 literal ("Bracket expressions where the range is backward, for example @samp{[z-a]}, are ");
215 if (options & RE_NO_EMPTY_RANGES)
221 if (options & RE_BACKSLASH_ESCAPE_IN_LISTS)
222 literal ("Within square brackets, @samp{\\} can be used to quote "
223 "the following character. ");
225 literal ("Within square brackets, @samp{\\} is taken literally. ");
227 if (options & RE_CHAR_CLASSES)
228 content ("Character classes are supported; for example "
229 "@samp{[[:digit:]]} will match a single decimal digit. ");
231 literal ("Character classes are not supported, so for example "
232 "you would need to use @samp{[0-9]} "
233 "instead of @samp{[[:digit:]]}. ");
235 if (options & RE_HAT_LISTS_NOT_NEWLINE)
237 literal ("Non-matching lists @samp{[^@dots{}]} do not ever match newline. ");
240 if (options & RE_NO_GNU_OPS)
242 content ("GNU extensions are not supported and so "
243 "@samp{\\w}, @samp{\\W}, @samp{\\<}, @samp{\\>}, @samp{\\b}, @samp{\\B}, @samp{\\`}, and @samp{\\'} "
245 "@samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively. ");
249 content ("GNU extensions are supported:");
251 enum_item ("@samp{\\w} matches a character within a word");
252 enum_item ("@samp{\\W} matches a character which is not within a word");
253 enum_item ("@samp{\\<} matches the beginning of a word");
254 enum_item ("@samp{\\>} matches the end of a word");
255 enum_item ("@samp{\\b} matches a word boundary");
256 enum_item ("@samp{\\B} matches characters which are not a word boundary");
257 enum_item ("@samp{\\`} matches the beginning of the whole input");
258 enum_item ("@samp{\\'} matches the end of the whole input");
265 if (options & RE_NO_BK_PARENS)
267 literal ("Grouping is performed with parentheses @samp{()}. ");
269 if (options & RE_UNMATCHED_RIGHT_PAREN_ORD)
270 literal ("An unmatched @samp{)} matches just itself. ");
274 literal ("Grouping is performed with backslashes followed by parentheses @samp{\\(}, @samp{\\)}. ");
277 if (options & RE_NO_BK_REFS)
279 content ("A backslash followed by a digit matches that digit. ");
283 literal ("A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis ");
284 if (options & RE_NO_BK_PARENS)
285 literal ("@samp{(}");
287 literal ("@samp{\\(}");
293 if (!(options & RE_LIMITED_OPS))
295 if (options & RE_NO_BK_VBAR)
296 literal ("The alternation operator is @samp{|}. ");
298 literal ("The alternation operator is @samp{\\|}. ");
302 if (options & RE_CONTEXT_INDEP_ANCHORS)
304 literal ("The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified. ");
308 literal ("The character @samp{^} only represents the beginning of a string when it appears:");
310 enum_item ("\nAt the beginning of a regular expression");
311 enum_item ("After an open-group, signified by ");
312 if (options & RE_NO_BK_PARENS)
314 literal ("@samp{(}");
318 literal ("@samp{\\(}");
321 if (!(options & RE_LIMITED_OPS))
323 if (options & RE_NEWLINE_ALT)
324 enum_item ("After a newline");
326 if (options & RE_NO_BK_VBAR )
327 enum_item ("After the alternation operator @samp{|}");
329 enum_item ("After the alternation operator @samp{\\|}");
334 literal ("The character @samp{$} only represents the end of a string when it appears:");
336 enum_item ("At the end of a regular expression");
337 enum_item ("Before a close-group, signified by ");
338 if (options & RE_NO_BK_PARENS)
340 literal ("@samp{)}");
344 literal ("@samp{\\)}");
346 if (!(options & RE_LIMITED_OPS))
348 if (options & RE_NEWLINE_ALT)
349 enum_item ("Before a newline");
351 if (options & RE_NO_BK_VBAR)
352 enum_item ("Before the alternation operator @samp{|}");
354 enum_item ("Before the alternation operator @samp{\\|}");
359 if (!(options & RE_LIMITED_OPS) )
361 if ((options & RE_CONTEXT_INDEP_OPS)
362 && !(options & RE_CONTEXT_INVALID_OPS))
364 literal ("The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression. ");
368 if (options & RE_BK_PLUS_QM)
369 literal ("@samp{\\*}, @samp{\\+} and @samp{\\?} ");
371 literal ("@samp{*}, @samp{+} and @samp{?} ");
373 if (options & RE_CONTEXT_INVALID_OPS)
375 content ("are special at any point in a regular expression except the following places, where they are not allowed:");
379 content ("are special at any point in a regular expression except:");
383 enum_item ("At the beginning of a regular expression");
384 enum_item ("After an open-group, signified by ");
385 if (options & RE_NO_BK_PARENS)
387 literal ("@samp{(}");
391 literal ("@samp{\\(}");
393 if (!(options & RE_LIMITED_OPS))
395 if (options & RE_NEWLINE_ALT)
396 enum_item ("After a newline");
398 if (options & RE_NO_BK_VBAR)
399 enum_item ("After the alternation operator @samp{|}");
401 enum_item ("After the alternation operator @samp{\\|}");
409 if (options & RE_INTERVALS)
411 if (options & RE_NO_BK_BRACES)
413 literal ("Intervals are specified by @samp{@{} and @samp{@}}. ");
414 if (options & RE_INVALID_INTERVAL_ORD)
416 literal ("Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\\@{1}");
420 literal ("Invalid intervals such as @samp{a@{1z} are not accepted. ");
425 literal ("Intervals are specified by @samp{\\@{} and @samp{\\@}}. ");
426 if (options & RE_INVALID_INTERVAL_ORD)
428 literal ("Invalid intervals are treated as literals, for example @samp{a\\@{1} is treated as @samp{a@{1}");
432 literal ("Invalid intervals such as @samp{a\\@{1z} are not accepted. ");
439 if (options & RE_NO_POSIX_BACKTRACKING)
441 content ("Matching succeeds as soon as the whole pattern is matched, meaning that the result may not be the longest possible match. ");
445 content ("The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups. ");
454 static const char *copy_para[]=
456 "Copyright (C) 1994, 1996, 1998, 2000, 2001, 2003, 2004, 2005, 2006,"
457 ,"2007, 2009, 2010, 2011 Free Software Foundation, Inc."
459 ,"Permission is granted to copy, distribute and/or modify this document"
460 ,"under the terms of the GNU Free Documentation License, Version 1.3 or"
461 ,"any later version published by the Free Software Foundation; with no"
462 ,"Invariant Sections, with no Front-Cover Texts, and with no Back-Cover"
463 ,"Texts. A copy of the license is included in the ``GNU Free"
464 ,"Documentation License'' file as part of this distribution."
468 const char **s = copy_para;
474 ignore (int ix, const unsigned int context)
476 return 0 == (get_regex_type_context (ix) & context);
480 menu (unsigned int context)
485 output ("@menu\n", 0);
487 options = get_regex_type_flags (i),
488 name=get_regex_type_name (i);
491 if (!ignore (i, context))
495 content (" regular expression syntax");
500 output ("@end menu\n", 0);
506 get_next (unsigned int ix, unsigned int context)
509 while (get_regex_type_name (ix))
511 if (!ignore (ix, context))
513 next = get_regex_type_name (ix);
526 describe_all (const char *contextname,
527 unsigned int context,
530 const char *name, *next, *previous;
536 literal ("@c this regular expression description is for: ");
537 literal (contextname);
545 options = get_regex_type_flags (i),
546 name=get_regex_type_name (i);
549 if (ignore (i, context))
552 "Skipping regexp type %s for context %s\n",
558 next = get_next (i+1, context);
561 begin_subsection (name, next, previous, up);
562 parent = get_regex_type_synonym (i);
565 content ("This is a synonym for ");
566 content (get_regex_type_name (parent));
571 describe_regex_syntax (options);
580 main (int argc, char *argv[])
583 unsigned int context = CONTEXT_ALL;
584 const char *contextname = "all";
587 set_program_name (argv[0]);
589 set_program_name ("regexprops");
597 contextname = argv[2];
598 if (0 == strcmp (contextname, "findutils"))
599 context = CONTEXT_FINDUTILS;
600 else if (0 == strcmp (contextname, "generic"))
601 context = CONTEXT_GENERIC;
602 else if (0 == strcmp (contextname, "all"))
603 context = CONTEXT_ALL;
606 fprintf (stderr, "Unexpected context %s",
612 describe_all (contextname, context, up);